222059-1734531659fea9

合并多个pdf为一个总的pdf,并生成书签

最近做了一个需求,需要将用户上传的多个pdf文件,合并成成一个pdf文件,生成书签。

采用了开源的pdfbox库,写了一个demo如下

1.引入pom依赖

1
2
3
4
5
6
7
8
9
10
11
12
13
<!-- pdf box -->
<dependency>
<groupId>org.apache.pdfbox</groupId>
<artifactId>pdfbox</artifactId>
<version>2.0.27</version>
</dependency>

<!-- 使用该依赖处理json -->
<dependency>
<groupId>com.alibaba</groupId>
<artifactId>fastjson</artifactId>
<version>1.2.58</version>
</dependency>

2.定义service接口

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
import com.alibaba.fastjson.JSONObject;

import java.io.IOException;

public interface PdfService {

/**
* 合并多个pdf文件到一个文件
*
* @param outputPath
* @param filePath
* @return
*/
JSONObject mergeFile(String outputPath, String... filePath) throws IOException;

/**
* 给pdf文件添加书签
* @param sourcePath
* @param filePath
* @throws IOException
*/
void addBookMark(JSONObject json, String sourcePath, String... filePath) throws IOException;
}

3.编写实现类

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
import com.alibaba.fastjson.JSONObject;
import com.xiaoyun.service.PdfService;
import org.apache.pdfbox.multipdf.PDFMergerUtility;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.pdmodel.PageMode;
import org.apache.pdfbox.pdmodel.interactive.documentnavigation.destination.PDPageXYZDestination;
import org.apache.pdfbox.pdmodel.interactive.documentnavigation.outline.PDDocumentOutline;
import org.apache.pdfbox.pdmodel.interactive.documentnavigation.outline.PDOutlineItem;
import org.springframework.stereotype.Service;

import java.io.File;
import java.io.IOException;
import java.util.HashMap;

@Service
public class PdfServiceImpl implements PdfService {
@Override
public JSONObject mergeFile(String outputPath, String... pdfPaths) throws IOException {
// 封装json对象 承载数据
JSONObject json = new JSONObject();
// 记录文件页数
HashMap<String, Integer> pageMap = new HashMap<>();

// 合并文件逻辑
PDFMergerUtility merger = new PDFMergerUtility();
merger.setDestinationFileName(outputPath);

for (String pdf : pdfPaths) {
File file = new File(pdf);
merger.addSource(file);

try (PDDocument doc = PDDocument.load(file)) {
pageMap.put(pdf, doc.getNumberOfPages());
}
}
merger.mergeDocuments(null);

json.put("page", pageMap);
return json;
}

@Override
public void addBookMark(JSONObject json, String sourcePath, String... filePath) throws IOException {
JSONObject page = json.getJSONObject("page");
// 当前页面
int curPage = 0;

// 添加书签
try (
// 源文件
PDDocument document = PDDocument.load(new File(sourcePath))
) {
// 创建大纲
PDDocumentOutline documentOutline = new PDDocumentOutline();
document.getDocumentCatalog().setDocumentOutline(documentOutline);

// 创建大纲项目
PDOutlineItem pagesOutline = new PDOutlineItem();
pagesOutline.setTitle("大 纲");
documentOutline.addLast(pagesOutline);

for (String path : filePath) {
PDPageXYZDestination pdPageXYZDestination = new PDPageXYZDestination();
pdPageXYZDestination.setTop(5000);
pdPageXYZDestination.setLeft(0);
pdPageXYZDestination.setPage(document.getPage(curPage));

PDOutlineItem bookmark = new PDOutlineItem();
bookmark.setDestination(pdPageXYZDestination);
bookmark.setTitle(path.split("\\.")[0].replace("D:\\素材\\", ""));
pagesOutline.addLast(bookmark);

curPage += page.getInteger(path);
}

pagesOutline.openNode();
documentOutline.openNode();

document.getDocumentCatalog().setPageMode(PageMode.USE_OUTLINES);

document.save(sourcePath);
}

}
}

4.测试

在单元测试中写入如下代码

1
2
3
4
5
6
7
8
9
@Test
void contextLoads() {
try {
JSONObject json = pdfService.mergeFile("D:\\素材\\result.pdf", "D:\\素材\\林俊杰.pdf", "D:\\素材\\周杰伦.pdf", "D:\\素材\\黄霄雲.pdf");
pdfService.addBookMark(json,"D:\\素材\\result.pdf", "D:\\素材\\林俊杰.pdf", "D:\\素材\\周杰伦.pdf", "D:\\素材\\黄霄雲.pdf");
} catch (IOException e) {
throw new RuntimeException(e);
}
}

打开新生成的文件,内容如下:

image-20241221125920711