Skip to content

Commit 74d2f32

Browse files
Merge pull request #243 from metanorma/table_autolayout_refactoring
Table autolayout refactoring
2 parents d9101ef + 8dd3123 commit 74d2f32

File tree

8 files changed

+168
-44
lines changed

8 files changed

+168
-44
lines changed

Makefile

+1-1
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ SHELL ?= /bin/bash
66
endif
77

88
#JAR_VERSION := $(shell mvn -q -Dexec.executable="echo" -Dexec.args='$${project.version}' --non-recursive exec:exec -DforceStdout)
9-
JAR_VERSION := 1.86
9+
JAR_VERSION := 1.87
1010
JAR_FILE := mn2pdf-$(JAR_VERSION).jar
1111

1212
all: target/$(JAR_FILE)

README.adoc

+5-5
Original file line numberDiff line numberDiff line change
@@ -17,14 +17,14 @@ You will need the Java Development Kit (JDK) version 8, Update 241 (8u241) or hi
1717

1818
[source,sh]
1919
----
20-
java -Xss5m -Xmx2048m -jar target/mn2pdf-1.86.jar --xml-file <XML-FileName> --xsl-file <XSLT-FileName> --pdf-file <Output-PDF-FileName> [--syntax-highlight]
20+
java -Xss5m -Xmx2048m -jar target/mn2pdf-1.87.jar --xml-file <XML-FileName> --xsl-file <XSLT-FileName> --pdf-file <Output-PDF-FileName> [--syntax-highlight]
2121
----
2222

2323
e.g.
2424

2525
[source,sh]
2626
----
27-
java -Xss5m -Xmx2048m -jar target/mn2pdf-1.86.jar --xml-file tests/G.191.xml --xsl-file tests/itu.recommendation.xsl --pdf-file tests/G.191.pdf
27+
java -Xss5m -Xmx2048m -jar target/mn2pdf-1.87.jar --xml-file tests/G.191.xml --xsl-file tests/itu.recommendation.xsl --pdf-file tests/G.191.pdf
2828
----
2929

3030
=== PDF encryption features
@@ -100,7 +100,7 @@ Update version in `pom.xml`, e.g.:
100100
----
101101
<groupId>org.metanorma.fop</groupId>
102102
<artifactId>mn2pdf</artifactId>
103-
<version>1.86</version>
103+
<version>1.87</version>
104104
<name>Metanorma XML to PDF converter</name>
105105
----
106106

@@ -111,8 +111,8 @@ Tag the same version in Git:
111111

112112
[source,xml]
113113
----
114-
git tag v1.86
115-
git push origin v1.86
114+
git tag v1.87
115+
git push origin v1.87
116116
----
117117

118118
Then the corresponding GitHub release will be automatically created at:

pom.xml

+1-1
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
<modelVersion>4.0.0</modelVersion>
66
<groupId>org.metanorma.fop</groupId>
77
<artifactId>mn2pdf</artifactId>
8-
<version>1.86</version>
8+
<version>1.87</version>
99
<name>Metanorma XML to PDF converter</name>
1010
<packaging>jar</packaging>
1111
<url>https://www.metanorma.org</url>

src/main/java/org/metanorma/fop/PDFGenerator.java

+82-16
Original file line numberDiff line numberDiff line change
@@ -7,11 +7,7 @@
77
import java.nio.file.Files;
88
import java.nio.file.Path;
99
import java.nio.file.Paths;
10-
import java.util.ArrayList;
11-
import java.util.Arrays;
12-
import java.util.HashMap;
13-
import java.util.Map;
14-
import java.util.Properties;
10+
import java.util.*;
1511
import java.util.logging.Level;
1612
import java.util.logging.Logger;
1713
import javax.xml.parsers.*;
@@ -1193,24 +1189,70 @@ private void setTablesWidths(fontConfig fontcfg, XSLTconverter xsltConverter, Fi
11931189

11941190
SourceXMLDocument sourceXMLDocumentTablesOnly = new SourceXMLDocument(xmlTablesOnly);
11951191

1196-
// transform XML to XSL-FO (XML .fo file)
1197-
xsltConverter.transform(sourceXMLDocumentTablesOnly, false);
1192+
int countTableCells = sourceXMLDocumentTablesOnly.getCountTableCells();
1193+
if (countTableCells < 30000) {
1194+
// transform XML to XSL-FO (XML .fo file)
1195+
xsltConverter.transform(sourceXMLDocumentTablesOnly, false);
11981196

1199-
String xmlFO = sourceXMLDocumentTablesOnly.getXMLFO();
1197+
String xmlFO = sourceXMLDocumentTablesOnly.getXMLFO();
12001198

1201-
//debug
1202-
debugSaveXML(xmlFO, pdf.getAbsolutePath() + ".fo.tables.xml");
1199+
//debug
1200+
debugSaveXML(xmlFO, pdf.getAbsolutePath() + ".fo.tables.xml");
12031201

1204-
fontcfg.outputFontManifestLog(Paths.get(pdf.getAbsolutePath() + ".tables.fontmanifest.log.txt"));
1202+
fontcfg.outputFontManifestLog(Paths.get(pdf.getAbsolutePath() + ".tables.fontmanifest.log.txt"));
12051203

1206-
fontcfg.setSourceDocumentFontList(sourceXMLDocumentTablesOnly.getDocumentFonts());
1204+
fontcfg.setSourceDocumentFontList(sourceXMLDocumentTablesOnly.getDocumentFonts());
12071205

1208-
Source sourceFO = new StreamSource(new StringReader(xmlFO));
1206+
Source sourceFO = new StreamSource(new StringReader(xmlFO));
12091207

1210-
logger.info("[INFO] Generation of Intermediate Format with information about the table's widths ...");
1211-
String xmlIF = generateFOPIntermediateFormat(sourceFO, fontcfg.getConfig(), pdf, true, ".tables");
1208+
logger.info("[INFO] Generation of Intermediate Format with information about the table's widths ...");
1209+
String xmlIF = generateFOPIntermediateFormat(sourceFO, fontcfg.getConfig(), pdf, true, ".tables");
12121210

1213-
xmlTableIF = createTableIF(xmlIF);
1211+
xmlTableIF = createTableIF(xmlIF);
1212+
1213+
} else { // for large tables, or large number of tables
1214+
1215+
List<String> tablesIds = sourceXMLDocumentTablesOnly.readElementsIds("//*[local-name() = 'table' or local-name() = 'dl']");
1216+
1217+
List<String> xmlTablesIF = new ArrayList<>();
1218+
// process each table separatery for memory consumption optimization
1219+
int tableCounter = 0;
1220+
int tableCount = tablesIds.size();
1221+
for (String tableId : tablesIds) {
1222+
tableCounter++;
1223+
logger.info("[INFO] Generation of XSL-FO (" + tableCounter + "/" + tableCount + ") with information about the table widths with id='" + tableId + "'...");
1224+
1225+
// process table with id=tableId only
1226+
xsltConverter.setParam("table_only_with_id", tableId);
1227+
1228+
// transform XML to XSL-FO (XML .fo file)
1229+
xsltConverter.transform(sourceXMLDocumentTablesOnly, false);
1230+
1231+
String xmlFO = sourceXMLDocumentTablesOnly.getXMLFO();
1232+
1233+
//debug
1234+
debugSaveXML(xmlFO, pdf.getAbsolutePath() + "." + tableId + ".fo.tables.xml");
1235+
1236+
fontcfg.outputFontManifestLog(Paths.get(pdf.getAbsolutePath() + "." + tableId + ".tables.fontmanifest.log.txt"));
1237+
1238+
fontcfg.setSourceDocumentFontList(sourceXMLDocumentTablesOnly.getDocumentFonts());
1239+
1240+
Source sourceFO = new StreamSource(new StringReader(xmlFO));
1241+
1242+
logger.info("[INFO] Generation of Intermediate Format (" + tableCounter + "/" + tableCount + ") with information about the table's widths with id='" + tableId + "'...");
1243+
String xmlIF = generateFOPIntermediateFormat(sourceFO, fontcfg.getConfig(), pdf, true, "." + tableId + ".tables");
1244+
1245+
xmlTableIF = createTableIF(xmlIF);
1246+
1247+
debugSaveXML(xmlTableIF, pdf.getAbsolutePath() + "." + tableId + ".tables.xml");
1248+
1249+
xmlTableIF = tableWidthsCleanup(xmlTableIF);
1250+
1251+
xmlTablesIF.add(xmlTableIF);
1252+
}
1253+
xmlTableIF = tablesWidthsUnion(xmlTablesIF);
1254+
xsltConverter.setParam("table_only_with_id", ""); // further process all tables
1255+
}
12141256

12151257
debugSaveXML(xmlTableIF, pdf.getAbsolutePath() + ".tables.xml");
12161258

@@ -1261,6 +1303,7 @@ private void debugSaveXML(String xmlString, String pathTo) {
12611303
}
12621304
}
12631305

1306+
12641307
private int getIFPageCount(String xmlIF) {
12651308
int pagecount = 0;
12661309
if (xmlIF != null) {
@@ -1280,4 +1323,27 @@ private void saveDebugFO(String debugXSLFO) {
12801323
}
12811324
}
12821325

1326+
private String tableWidthsCleanup(String table) {
1327+
int startPos = table.indexOf("<table ");
1328+
int endPos = table.indexOf("</tables>");
1329+
table = table.substring(startPos, endPos);
1330+
int startPosTbody = table.indexOf("<tbody>");
1331+
table = table.substring(0,startPosTbody) + "</table>";
1332+
return table;
1333+
}
1334+
1335+
private String tablesWidthsUnion(List<String> tables) {
1336+
StringBuilder sbTablesIF = new StringBuilder();
1337+
if (!tables.isEmpty()) {
1338+
sbTablesIF.append("<?xml version=\"1.0\" encoding=\"UTF-8\"?><tables>");
1339+
}
1340+
for (String itemTableIF: tables) {
1341+
sbTablesIF.append(itemTableIF);
1342+
}
1343+
if (!tables.isEmpty()) {
1344+
sbTablesIF.append("</tables>");
1345+
}
1346+
return sbTablesIF.toString();
1347+
}
1348+
12831349
}

src/main/java/org/metanorma/fop/SourceXMLDocument.java

+50-9
Original file line numberDiff line numberDiff line change
@@ -75,19 +75,13 @@ public SourceXMLDocument(File fXML) {
7575
DocumentBuilder dBuilder = dbFactory.newDocumentBuilder();
7676
InputStream xmlstream = new FileInputStream(fXML);
7777
sourceXML = dBuilder.parse(xmlstream);
78-
79-
String element_review = readValue("//*[local-name() = 'review'][1]");
80-
this.hasAnnotations = element_review.length() != 0;
81-
String element_table = readValue("//*[local-name() = 'table' or local-name() = 'dl'][1]");
82-
this.hasTables = element_table.length() != 0;
83-
String element_math = readValue("//*[local-name() = 'math'][1]");
84-
this.hasMath = element_math.length() != 0;
78+
readMetaInformation();
8579
} catch (Exception ex) {
8680
logger.severe("Can't read source XML.");
8781
ex.printStackTrace();
8882
}
8983
}
90-
84+
9185
public SourceXMLDocument(String strXML) {
9286
try {
9387
this.sourceXMLstr = strXML;
@@ -100,7 +94,17 @@ public SourceXMLDocument(String strXML) {
10094
ex.printStackTrace();
10195
}
10296
}
103-
97+
98+
private void readMetaInformation() {
99+
String element_review = readValue("//*[local-name() = 'review'][1]");
100+
this.hasAnnotations = element_review.length() != 0;
101+
String element_table = readValue("//*[local-name() = 'table' or local-name() = 'dl'][1]");
102+
this.hasTables = element_table.length() != 0;
103+
String element_math = readValue("//*[local-name() = 'math'][1]");
104+
this.hasMath = element_math.length() != 0;
105+
}
106+
107+
104108
public StreamSource getStreamSource() {
105109
if (sourceXMLstr.isEmpty()) {
106110
try {
@@ -410,6 +414,38 @@ private String readValue(String xpath) {
410414
return value;
411415
}
412416

417+
private int readTableCellsCount(){
418+
int count = 0;
419+
try {
420+
XPath xPath = XPathFactory.newInstance().newXPath();
421+
XPathExpression query = xPath.compile("//*[local-name() = 'td' or local-name() = 'th' or local-name() = 'dt' or local-name() = 'dd']");
422+
NodeList nodes = (NodeList)query.evaluate(sourceXML, XPathConstants.NODESET);
423+
count = nodes.getLength();
424+
} catch (Exception ex) {
425+
logger.severe(ex.toString());
426+
}
427+
return count;
428+
}
429+
430+
public List<String> readElementsIds(String xpath) {
431+
List<String> values = new ArrayList<>();
432+
try {
433+
XPath xPath = XPathFactory.newInstance().newXPath();
434+
XPathExpression query = xPath.compile(xpath);
435+
NodeList nodes = (NodeList)query.evaluate(sourceXML, XPathConstants.NODESET);
436+
for (int i = 0; i < nodes.getLength(); i++) {
437+
Node node_id = nodes.item(i).getAttributes().getNamedItem("id");
438+
if (node_id != null) {
439+
String id = node_id.getTextContent();
440+
values.add(id);
441+
}
442+
}
443+
} catch (Exception ex) {
444+
logger.severe(ex.toString());
445+
}
446+
return values;
447+
}
448+
413449
public boolean hasAnnotations() {
414450
return hasAnnotations;
415451
}
@@ -423,6 +459,11 @@ public boolean hasMath() {
423459
return hasMath;
424460
}
425461

462+
public int getCountTableCells() {
463+
int countTableCells = readTableCellsCount();
464+
return countTableCells;
465+
}
466+
426467
public void flushResources() {
427468
sourceXML = null;
428469
sourceXMLstr = "";

src/main/java/org/metanorma/fop/Util.java

+20-10
Original file line numberDiff line numberDiff line change
@@ -5,16 +5,7 @@
55
import java.awt.font.TextLayout;
66
import java.awt.geom.Rectangle2D;
77
import java.awt.image.BufferedImage;
8-
import java.io.BufferedReader;
9-
import java.io.BufferedWriter;
10-
import java.io.ByteArrayInputStream;
11-
import java.io.File;
12-
import java.io.FileInputStream;
13-
import java.io.FileOutputStream;
14-
import java.io.FileReader;
15-
import java.io.IOException;
16-
import java.io.InputStream;
17-
import java.io.StringReader;
8+
import java.io.*;
189
import java.net.HttpURLConnection;
1910
import java.net.URI;
2011
import java.net.URISyntaxException;
@@ -61,7 +52,12 @@
6152
import javax.xml.parsers.DocumentBuilderFactory;
6253
import javax.xml.parsers.ParserConfigurationException;
6354
import javax.xml.parsers.SAXParserFactory;
55+
import javax.xml.transform.OutputKeys;
56+
import javax.xml.transform.Transformer;
57+
import javax.xml.transform.TransformerException;
6458
import javax.xml.transform.TransformerFactory;
59+
import javax.xml.transform.dom.DOMSource;
60+
import javax.xml.transform.stream.StreamResult;
6561
import javax.xml.xpath.XPath;
6662
import javax.xml.xpath.XPathConstants;
6763
import javax.xml.xpath.XPathExpression;
@@ -794,4 +790,18 @@ public static Node parseCSS(String cssString) {
794790
}
795791
return node;
796792
}
793+
794+
private static String nodeToString(Node node) {
795+
StringWriter sw = new StringWriter();
796+
try {
797+
Transformer t = TransformerFactory.newInstance().newTransformer();
798+
t.setOutputProperty(OutputKeys.OMIT_XML_DECLARATION, "yes");
799+
t.setOutputProperty(OutputKeys.INDENT, "yes");
800+
t.transform(new DOMSource(node), new StreamResult(sw));
801+
} catch (TransformerException e) {
802+
System.out.println("nodeToString Transformer Exception: " + e.toString());
803+
}
804+
return sw.toString();
805+
}
806+
797807
}

src/main/resources/table_if.xsl

+7-2
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
xmlns:xalan="http://xml.apache.org/xalan"
66
xmlns:java="http://xml.apache.org/xalan/java"
77
xmlns:str="http://exslt.org/strings"
8-
exclude-result-prefixes="java str"
8+
exclude-result-prefixes="fo if xalan java str"
99
version="1.0">
1010

1111
<xsl:output method="xml" encoding="UTF-8" indent="no"/>
@@ -54,7 +54,7 @@
5454
</xsl:variable>
5555

5656
<xsl:variable name="ids" select="xalan:nodeset($ids_)"/>
57-
57+
<!-- <xsl:copy-of select="$ids"/> -->
5858
<tables>
5959
<!-- <xsl:apply-templates select="//if:id[starts-with(@name,$table_if_start_prefix)]"> -->
6060
<xsl:apply-templates select="$ids//if:id[starts-with(@name,$table_if_start_prefix)]">
@@ -136,6 +136,8 @@
136136

137137
<xsl:variable name="cells" select="xalan:nodeset($cells_)"/>
138138

139+
<!-- <xsl:copy-of select="$cells"/> -->
140+
139141
<xsl:variable name="table_body_">
140142
<tbody>
141143
<xsl:for-each select="$cells/cell[generate-id(.) = generate-id(key('kRow', @row)[1])]">
@@ -164,6 +166,7 @@
164166
<xsl:for-each select="$cells">
165167
<xsl:for-each select="key('kRowCell', concat($row, ' ', $col))"> <!-- select all 'cell' relate to one source table cell -->
166168
<!-- <divide><xsl:value-of select="@divide"/></divide> -->
169+
<!-- <length><xsl:value-of select="@length"/></length> -->
167170
<xsl:choose>
168171
<xsl:when test="@type = 'p'">
169172
<p_len><xsl:value-of select="round(@length div @divide)"/></p_len>
@@ -177,6 +180,8 @@
177180
</xsl:variable>
178181
<xsl:variable name="lengths" select="xalan:nodeset($lengths_)"/>
179182

183+
<!-- <xsl:copy-of select="$lengths"/> -->
184+
180185
<xsl:for-each select="$lengths/*">
181186
<xsl:copy>
182187
<xsl:choose>

src/main/resources/tables_only.xsl

+2
Original file line numberDiff line numberDiff line change
@@ -110,6 +110,8 @@
110110

111111
<xsl:template match="*[local-name() = 'table'][@type = 'sourcecode']" priority="2"/>
112112

113+
<xsl:template match="*[local-name() = 'image'][not(ancestor::*[local-name() = 'table']) and not(ancestor::*[local-name() = 'dl'])]" priority="2"/>
114+
113115
<xsl:template match="@*|node()" mode="simple_td">
114116
<xsl:copy>
115117
<xsl:apply-templates select="@*|node()" mode="simple_td"/>

0 commit comments

Comments
 (0)