diff --git a/CHANGES b/CHANGES index 638ca4a369..26522f4c70 100644 --- a/CHANGES +++ b/CHANGES @@ -74,6 +74,9 @@ jsoup changelog * Bugfix [Fuzz]: Fix a potential stack-overflow in the parser given crafted HTML, when the parser looped in the InSelectInTable state. + * Bugfix [Fuzz]: Fix an IOOB when the HTML root was cleared from the stack and then attributes were merged onto it. + + *** Release 1.14.1 [2021-Jul-10] * Change: updated the minimum supported Java version from Java 7 to Java 8. diff --git a/src/main/java/org/jsoup/parser/HtmlTreeBuilderState.java b/src/main/java/org/jsoup/parser/HtmlTreeBuilderState.java index e932a4a88b..7f63ea2dab 100644 --- a/src/main/java/org/jsoup/parser/HtmlTreeBuilderState.java +++ b/src/main/java/org/jsoup/parser/HtmlTreeBuilderState.java @@ -340,12 +340,15 @@ private boolean inBodyStartTag(Token t, HtmlTreeBuilder tb) { break; case "html": tb.error(this); - // merge attributes onto real html - Element html = tb.getStack().get(0); - if (startTag.hasAttributes()) { - for (Attribute attribute : startTag.attributes) { - if (!html.hasAttr(attribute.getKey())) - html.attributes().put(attribute); + // merge attributes onto real html (if present) + stack = tb.getStack(); + if (stack.size() > 0) { + Element html = tb.getStack().get(0); + if (startTag.hasAttributes()) { + for (Attribute attribute : startTag.attributes) { + if (!html.hasAttr(attribute.getKey())) + html.attributes().put(attribute); + } } } break; diff --git a/src/test/java/org/jsoup/integration/FuzzFixesTest.java b/src/test/java/org/jsoup/integration/FuzzFixesTest.java index c203b1f79c..5d7bcc8576 100644 --- a/src/test/java/org/jsoup/integration/FuzzFixesTest.java +++ b/src/test/java/org/jsoup/integration/FuzzFixesTest.java @@ -193,4 +193,16 @@ public void overflow1607() throws IOException { Document docXml = Jsoup.parse(new FileInputStream(in), "UTF-8", "https://example.com", Parser.xmlParser()); assertNotNull(docXml); } + + @Test + public void oob() throws IOException { + // https://github.com/jhy/jsoup/issues/1611 + File in = ParseTest.getFile("/fuzztests/1611.html.gz"); + + Document doc = Jsoup.parse(in, "UTF-8"); + assertNotNull(doc); + + Document docXml = Jsoup.parse(new FileInputStream(in), "UTF-8", "https://example.com", Parser.xmlParser()); + assertNotNull(docXml); + } } diff --git a/src/test/resources/fuzztests/1611.html.gz b/src/test/resources/fuzztests/1611.html.gz new file mode 100644 index 0000000000..90215d0a84 Binary files /dev/null and b/src/test/resources/fuzztests/1611.html.gz differ