使用 Apache POI 和 iTextPDF 将 Word 转换为 PDF 需要分两步操作:先用 POI 读取 Word 内容,再用 iText 生成 PDF。
apache poi官方文档:Apache POI™ - Javadocs
以下是详细的代码实现示例:
环境准备
在 pom.xml
中添加依赖:
<dependency><groupId>org.apache.poi</groupId><artifactId>poi-ooxml</artifactId><version>${poi.version}</version></dependency><dependency><groupId>org.apache.poi</groupId><artifactId>poi-scratchpad</artifactId><version>${poi.version}</version></dependency><dependency><groupId>org.apache.poi</groupId><artifactId>poi</artifactId><version>${poi.version}</version></dependency><dependency><groupId>com.itextpdf</groupId><artifactId>html2pdf</artifactId><version>3.0.2</version></dependency><dependency><groupId>com.itextpdf</groupId><artifactId>itextpdf</artifactId><version>5.5.6</version></dependency><dependency><groupId>com.itextpdf</groupId><artifactId>itext-asian</artifactId><version>5.2.0</version></dependency>
完整代码示例
1.入口类
package org.example;import com.itextpdf.text.Document;
import com.itextpdf.text.DocumentException;
import com.itextpdf.text.pdf.PdfWriter;
import org.apache.poi.xwpf.usermodel.IBodyElement;
import org.apache.poi.xwpf.usermodel.XWPFDocument;import java.io.*;
import java.util.List;/*** 用poi和itextpdf实现docx转pdf方法** @author lyl* @version v1.0* @since 2025/4/11*/
public class Main {public static void main(String[] args) throws IOException {// 打开 docx 文件FileInputStream fis = new FileInputStream("D:\\testaa\\11.docx");XWPFDocument doc=new XWPFDocument(fis);//处理bodyList<IBodyElement> bodyElementList=doc.getBodyElements();Document pdfDoc = createPdf("D:\\testaa\\22.pdf");try {BodyElementUtil.createBody(bodyElementList,pdfDoc);} catch (DocumentException e) {throw new RuntimeException(e);}doc.close();fis.close();}/*** 生成pdf文档** @param pdfFilePath* @return*/public static Document createPdf(String pdfFilePath) {try {// 创建 PDF 文档Document pdfDoc = new Document();File htmlFile = new File(pdfFilePath);if (!htmlFile.exists()) {String dic = htmlFile.getParent();if (!new File(dic).exists()) {new File(dic).mkdirs();}htmlFile.createNewFile();}PdfWriter.getInstance(pdfDoc, new FileOutputStream(pdfFilePath));pdfDoc.open();return pdfDoc;} catch (Exception e) {e.printStackTrace();}return null;}
}
2.处理body元素的工具类
package org.example;import com.itextpdf.text.Document;
import com.itextpdf.text.DocumentException;
import com.itextpdf.text.Paragraph;
import com.itextpdf.text.pdf.PdfPCell;
import com.itextpdf.text.pdf.PdfPTable;
import org.apache.poi.xwpf.usermodel.IBodyElement;import org.apache.poi.xwpf.usermodel.XWPFTable;import java.util.List;/*** 处理body元素的工具类** @author lyl* @version v1.0* @since 2025/4/11*/
public class BodyElementUtil {/*** 转换成itextpdf的格式** @param bodyElementList*/public static void createBody(List<IBodyElement> bodyElementList, Document pdfDoc) throws DocumentException {for (IBodyElement bodyElement : bodyElementList) {System.out.println(bodyElement.getElementType().name());switch (bodyElement.getElementType()) {case PARAGRAPH:List<Paragraph> paragraphs = ParagraphUtil.createParagraph(bodyElement);for (Paragraph paragraph : paragraphs) {pdfDoc.add(paragraph);}break;case TABLE:List<XWPFTable> tables = bodyElement.getBody().getTables();PdfPTable allTable=new PdfPTable(1);for(XWPFTable doctable:tables) {PdfPTable pdfTable = TableUtil.createTable(doctable);PdfPCell cell = new PdfPCell(pdfTable);cell.setBorder(0);allTable.addCell(cell);}pdfDoc.add(allTable);break;default:break;}}pdfDoc.close();System.out.println("结束:");}}
3.处理段落,读取文字大小,字体类型转换成pdf的字体,文字大小
package org.example;import com.itextpdf.text.*;
import com.itextpdf.text.pdf.BaseFont;
import com.itextpdf.text.pdf.PdfPCell;
import org.apache.poi.xwpf.usermodel.*;
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTTcBorders;import java.io.IOException;
import java.util.ArrayList;
import java.util.List;/*** 段落文本处理类** @author lyl* @version v1.0* @since 2025/4/11*/
public class ParagraphUtil {public static List<Paragraph> createParagraph(IBodyElement bodyElement) {System.out.println("创建段落");List<XWPFParagraph> paragraphs = bodyElement.getBody().getParagraphs();List<Paragraph> paragraphsList = new ArrayList<>();for (int i = 0; i < paragraphs.size(); i++) {Paragraph p = new Paragraph();XWPFParagraph paragraph = paragraphs.get(i);System.out.println("段落内容:" + paragraph.getText());System.out.println("part:" + paragraph.getPart().getPackagePart().getContentType());System.out.println("part name:" + paragraph.getPart().getPackagePart().getPartName());ParagraphAlignment alignment = paragraph.getAlignment();System.out.println("段落对齐方式 name:" + alignment.name());System.out.println("段落对齐方式 value:" + alignment.getValue());System.out.println("style" + paragraph.getStyle());System.out.println("文字对齐方式" + paragraph.getFontAlignment());paragraphsList.add(getPhase(paragraph));}return paragraphsList;}/*** 单元格解析** @param paragraph* @return*/private static Paragraph getPhase(XWPFParagraph paragraph) {Paragraph pa = new Paragraph();// 获取段落对齐方式ParagraphAlignment alignment = paragraph.getAlignment();// 遍历段落中的所有文本for (XWPFRun run : paragraph.getRuns()) {if (null == run) {continue;}//设置图片List<Image> images = setPicture(run);if (null != images) {for (Image image : images) {pa.add(image);}}if (null == run.getText(0)) {return null;}Chunk chunk = new Chunk(run.getText(0), setFont(run));pa.add(chunk);}if (alignment.getValue() == ParagraphAlignment.CENTER.getValue()) {pa.setAlignment(Element.ALIGN_CENTER);} else if (alignment.getValue() == ParagraphAlignment.RIGHT.getValue()) {pa.setAlignment(Element.ALIGN_RIGHT);} else if (alignment.getValue() == ParagraphAlignment.LEFT.getValue()) {pa.setAlignment(Element.ALIGN_LEFT);}return pa;}/*** 设置图片** @param run*/private static List<Image> setPicture(XWPFRun run) {if (null != run.getEmbeddedPictures() && run.getEmbeddedPictures().size() > 0) {List<Image> imagelist = new ArrayList<>();for (XWPFPicture pic : run.getEmbeddedPictures()) {try {
// Paragraph p = new Paragraph();
// p.add(getimage(pic.getPictureData()));
// p.setAlignment(Element.ALIGN_CENTER);Image image = getimage(pic.getPictureData());imagelist.add(image);} catch (BadElementException e) {throw new RuntimeException(e);} catch (IOException e) {throw new RuntimeException(e);}}return imagelist;}return null;}/*** 设置字体** @param run* @return*/private static Font setFont(XWPFRun run) {try {BaseFont bf = null;bf = BaseFont.createFont("STSong-Light", "UniGB-UCS2-H", BaseFont.NOT_EMBEDDED);Font font = null;if (run.isBold()) {font = new Font(bf, run.getFontSize(), Font.BOLD, BaseColor.BLACK);} else {font = new Font(bf, run.getFontSize(), Font.NORMAL, BaseColor.BLACK);if (null != run.getFontFamily() && run.getFontFamily().equals("黑体")) {//设置为黑体font = new Font(bf, run.getFontSize(), Font.BOLD, BaseColor.BLACK);}}return font;} catch (DocumentException e) {throw new RuntimeException(e);} catch (IOException e) {throw new RuntimeException(e);}}/*** 读取图片** @param picdata* @return* @throws BadElementException* @throws IOException*/private static Image getimage(XWPFPictureData picdata) throws BadElementException, IOException {byte[] bytepic = picdata.getData();Image imag = Image.getInstance(bytepic);return imag;}}
4.处理表格,包含行合并,列合并,以及表格里有图片
package org.example;import com.itextpdf.text.*;
import com.itextpdf.text.pdf.BaseFont;
import com.itextpdf.text.pdf.PdfPCell;
import com.itextpdf.text.pdf.PdfPTable;
import org.apache.poi.xwpf.usermodel.*;
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTTcBorders;
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTTcPr;import java.io.IOException;
import java.util.HashSet;
import java.util.Set;/*** 表格文本处理类** @author lyl* @version v1.0* @since 2025/4/11*/
public class TableUtil {public static PdfPTable createTable(XWPFTable doctable) {// 获取表格的列数int cols = doctable.getRow(0).getTableCells().size();PdfPTable allTable=new PdfPTable(1);for (int i = 0; i < doctable.getRows().size(); i++) {XWPFTableRow row = doctable.getRows().get(i);//如果这一行都没有边框样式,则合并单元格int noBorder = 0;Set<PdfPCell> cells = new HashSet<>();for (XWPFTableCell cell : row.getTableCells()) {// 获取单元格背景颜色CTTcPr cellPr = cell.getCTTc().getTcPr();CTTcBorders cellBorders = null;if (null != cellPr) {// 获取单元格边框cellBorders = cellPr.getTcBorders();}PdfPCell pdfCell = getPhase(cell);if (null == pdfCell) {pdfCell = new PdfPCell(new Paragraph(cell.getText()));}if (cellBorders != null) {pdfCell = setBorder(pdfCell, cellBorders);} else {pdfCell.setBorder(0);}if (cellBorders == null) {noBorder++;}cells.add(pdfCell);}PdfPTable ptable = new PdfPTable(cells.size());//检 查是否需要合并if (noBorder == cells.size()) {//从第一格开始合并单元格int number=0;for (PdfPCell cell : cells) {if(number==0){//合并列格cell.setColspan(cols);//合并行//cell.setRowspan(2);}ptable.addCell(cell);}} else {for (PdfPCell cell : cells) {ptable.addCell(cell);}}PdfPCell lastCell= new PdfPCell(ptable);lastCell.setBorder(0);allTable.addCell(lastCell);}return allTable;}/*** 单元格解析** @param cell* @return*/private static PdfPCell getPhase(XWPFTableCell cell) {PdfPCell cell1 = new PdfPCell();// 遍历单元格中的所有段落for (XWPFParagraph paragraph : cell.getParagraphs()) {Paragraph pdfParaghs = new Paragraph(paragraph.getText());// 获取段落对齐方式ParagraphAlignment alignment = paragraph.getAlignment();if (alignment.getValue() == ParagraphAlignment.CENTER.getValue()) {pdfParaghs.setAlignment(Element.ALIGN_CENTER);} else if (alignment.getValue() == ParagraphAlignment.RIGHT.getValue()) {pdfParaghs.setAlignment(Element.ALIGN_RIGHT);} else if (alignment.getValue() == ParagraphAlignment.LEFT.getValue()) {pdfParaghs.setAlignment(Element.ALIGN_LEFT);}// 遍历段落中的所有文本for (XWPFRun run : paragraph.getRuns()) {if (null == run) {continue;}//设置图片Set<Image> images = setPicture(run);if (null != images) {for (Image image : images) {cell1.addElement(image);}} else if (null == run.getText(0)) {// pdfParaghs.add(new Paragraph(""));cell1.addElement(new Paragraph(""));} else {Chunk chunk = new Chunk(run.getText(0), setFont(run));// pdfParaghs.add(chunk);cell1.addElement(chunk);}}}return cell1;}/*** 设置字体样式连框** @param cellBorders* @return* @throws DocumentException* @throws IOException*/private static PdfPCell setBorder(PdfPCell cell1, CTTcBorders cellBorders) {if (null == cell1) {cell1 = new PdfPCell();}cell1.setBorder(0);if (null == cellBorders) {return cell1;}//System.out.println("text:::"+text);//printCellBorder( cellBorders);if (null != cellBorders.getBottom()) {cell1.setBorderColorBottom(BaseColor.BLACK);cell1.setBorderWidthBottom(1);}if (null != cellBorders.getTop()) {cell1.setBorderColorTop(BaseColor.BLACK);cell1.setBorderWidthTop(1);}if (null != cellBorders.getLeft()) {cell1.setBorderColorLeft(BaseColor.BLACK);cell1.setBorderWidthLeft(1);}if (null != cellBorders.getRight()) {cell1.setBorderWidthRight(1);cell1.setBorderColorRight(BaseColor.BLACK);}return cell1;}/*** 设置图片** @param run*/private static Set<Image> setPicture(XWPFRun run) {if (null != run.getEmbeddedPictures() && run.getEmbeddedPictures().size() > 0) {Set<Image> images = new HashSet<>();for (XWPFPicture pic : run.getEmbeddedPictures()) {try {
// Paragraph p = new Paragraph();
// p.add(getimage(pic.getPictureData()));
// p.setAlignment(Element.ALIGN_CENTER);images.add(getimage(pic.getPictureData()));} catch (BadElementException e) {throw new RuntimeException(e);} catch (IOException e) {throw new RuntimeException(e);}}return images;}return null;}/*** 设置字体** @param run* @return*/private static Font setFont(XWPFRun run) {try {BaseFont bf = null;bf = BaseFont.createFont("STSong-Light", "UniGB-UCS2-H", BaseFont.NOT_EMBEDDED);Font font = null;if (run.isBold()) {font = new Font(bf, run.getFontSize(), Font.BOLD, BaseColor.BLACK);} else {font = new Font(bf, run.getFontSize(), Font.NORMAL, BaseColor.BLACK);if (null != run.getFontFamily() && run.getFontFamily().equals("黑体")) {//设置为黑体font = new Font(bf, run.getFontSize(), Font.BOLD, BaseColor.BLACK);}}return font;} catch (DocumentException e) {throw new RuntimeException(e);} catch (IOException e) {throw new RuntimeException(e);}}/*** 读取图片** @param picdata* @return* @throws BadElementException* @throws IOException*/private static Image getimage(XWPFPictureData picdata) throws BadElementException, IOException {byte[] bytepic = picdata.getData();Image imag = Image.getInstance(bytepic);return imag;}
}