itextPdf 解析富文本标签
maven 依赖
<dependency>
<groupId>com.itextpdf</groupId>
<artifactId>itextpdf</artifactId>
<version>5.5.9</version>
</dependency>
<dependency>
<groupId>com.itextpdf</groupId>
<artifactId>itext-asian</artifactId>
<version>5.2.0</version>
</dependency>
<dependency>
<groupId>com.itextpdf.tool</groupId>
<artifactId>xmlworker</artifactId>
<version>5.5.10</version>
</dependency>
xml解析 富文本 工具类
import com.itextpdf.text.*;
import com.itextpdf.tool.xml.ElementList;
import com.itextpdf.tool.xml.XMLWorker;
import com.itextpdf.tool.xml.XMLWorkerFontProvider;
import com.itextpdf.tool.xml.XMLWorkerHelper;
import com.itextpdf.tool.xml.css.CssFile;
import com.itextpdf.tool.xml.css.StyleAttrCSSResolver;
import com.itextpdf.tool.xml.html.CssAppliers;
import com.itextpdf.tool.xml.html.CssAppliersImpl;
import com.itextpdf.tool.xml.html.Tags;
import com.itextpdf.tool.xml.parser.XMLParser;
import com.itextpdf.tool.xml.pipeline.css.CSSResolver;
import com.itextpdf.tool.xml.pipeline.css.CssResolverPipeline;
import com.itextpdf.tool.xml.pipeline.end.ElementHandlerPipeline;
import com.itextpdf.tool.xml.pipeline.html.HtmlPipeline;
import com.itextpdf.tool.xml.pipeline.html.HtmlPipelineContext;
import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class MyXMLWorkerHelper {
public static class MyFontsProvider extends XMLWorkerFontProvider {
public MyFontsProvider() {
super(null, null);
}
@Override
public Font getFont(final String fontname, String encoding, float size, final int style) {
String fntname = fontname;
if (fntname == null) {
fntname = "宋体";
}
return super.getFont(fntname, encoding, size, style);
}
}
public static ElementList parseToElementList(String html, String css) throws IOException {
// CSS
CSSResolver cssResolver = new StyleAttrCSSResolver();
if (css != null) {
CssFile cssFile = XMLWorkerHelper.getCSS(new ByteArrayInputStream(css.getBytes()));
cssResolver.addCss(cssFile);
}
// HTML
MyFontsProvider fontProvider = new MyFontsProvider();
CssAppliers cssAppliers = new CssAppliersImpl(fontProvider);
HtmlPipelineContext htmlContext = new HtmlPipelineContext(cssAppliers);
htmlContext.setTagFactory(Tags.getHtmlTagProcessorFactory());
htmlContext.autoBookmark(false);
// Pipelines
ElementList elements = new ElementList();
ElementHandlerPipeline end = new ElementHandlerPipeline(elements, null);
HtmlPipeline htmlPipeline = new HtmlPipeline(htmlContext, end);
CssResolverPipeline cssPipeline = new CssResolverPipeline(cssResolver, htmlPipeline);
// XML Worker
XMLWorker worker = new XMLWorker(cssPipeline, true);
XMLParser p = new XMLParser(worker);
// fix: ul/ol 中换行会解析为 li标签的问题
Pattern reg = Pattern.compile("<[ou]l.*>(\\s+)<li|</li>(\\s+)<((li)|(/[ou]l>))",Pattern.CASE_INSENSITIVE|Pattern.MULTILINE);
Matcher matcher = reg.matcher(html);
StringBuffer operatorStr=new StringBuffer(html);
while (matcher.find()){
String group1 = matcher.group(1);
String group2 = matcher.group(2);
if(group1!=null){
operatorStr.replace(matcher.start(1),matcher.end(1),"");
}
if(group2!=null){
operatorStr.replace(matcher.start(2),matcher.end(2),"");
}
matcher = reg.matcher(operatorStr);
}
// fix: sub sup 标签不生效 添加样式
html = operatorStr.toString().replace("<sub>", "<sub style=\"vertical-align: sub;\">").replace("<sup>", "<sup style=\"vertical-align: super;\">");
// fix: XMLWorker 解析 xml, html 标签中的单标签结构不符合要求
html = html.replace("<br>", "<br></br>").replace("<hr>", "<hr></hr>");
p.parse(new ByteArrayInputStream(html.getBytes()));
// for (int i = 0; i < elements.size(); i++) {
// Element element = elements.get(i);
// int type = element.type();
// if (type==14){
// List element1 = (List) element;
// element1.getItems().removeIf(item->{
// int type1 = item.type();
// if(type1==15){
// return ((Chunk) ((ListItem) item).get(0)).getContent().equals("\n");
// }else {
// return false;
// }
// });
// }
// }
return elements;
}
}
代码中使用
PdfPCell cell1 = new PdfPCell();
cell1.setHorizontalAlignment(Element.ALIGN_LEFT);
cell1.setVerticalAlignment(Element.ALIGN_TOP);
for (Element e : MyXMLWorkerHelper.parseToElementList(projectInfo.getCompAbout(), null)) {
cell1.addElement(e);
}
table.addCell(cell1);
document.add(table);