diff --git a/Source/WebCore/ChangeLog b/Source/WebCore/ChangeLog index fdc1c44b35ba9..395d1de6c3064 100644 --- a/Source/WebCore/ChangeLog +++ b/Source/WebCore/ChangeLog @@ -1,3 +1,28 @@ +2016-10-11 Alex Christensen + + URLParser should percent-encode non-ASCII and non-printable characters in fragment + https://bugs.webkit.org/show_bug.cgi?id=163287 + + Reviewed by Brady Eidson. + + Based on discussion in https://github.com/whatwg/url/issues/150 + If that discussion decides to keep the spec as-is (which keeps non-ASCII characters in the fragment + to match IE and Edge's behavior, which Chrome has followed for special schemes) then we can revert + this change later after enabling the URL parser. Making this change keeps behavior matching Safari + and Firefox, as well as Chrome's handling of non-special schemes, such as data URLs. + + Covered by updated API tests. + + * platform/URLParser.cpp: + (WebCore::URLParser::appendToASCIIBuffer): + (WebCore::URLParser::copyURLPartsUntil): + (WebCore::URLParser::syntaxViolation): + (WebCore::URLParser::currentPosition): + (WebCore::URLParser::parse): + (WebCore::URLParser::fragmentSyntaxViolation): Deleted. + * platform/URLParser.h: + No more non-ASCII characters in canonicalized URLs. + 2016-10-11 Alex Christensen Remove dead networking code diff --git a/Source/WebCore/platform/URLParser.cpp b/Source/WebCore/platform/URLParser.cpp index 9475d39bd43e5..859eda7ef916f 100644 --- a/Source/WebCore/platform/URLParser.cpp +++ b/Source/WebCore/platform/URLParser.cpp @@ -462,7 +462,6 @@ ALWAYS_INLINE bool URLParser::isWindowsDriveLetter(CodePointIterator& iterator m_didSeeSyntaxViolation = true; ASSERT(m_asciiBuffer.isEmpty()); - ASSERT(m_unicodeFragmentBuffer.isEmpty()); size_t codeUnitsToCopy = iterator.codeUnitsSince(reinterpret_cast(m_inputBegin)); RELEASE_ASSERT(codeUnitsToCopy <= m_inputString.length()); m_asciiBuffer.reserveCapacity(m_inputString.length()); @@ -1028,30 +1024,6 @@ void URLParser::syntaxViolation(const CodePointIterator& iterator } } -template -void URLParser::fragmentSyntaxViolation(const CodePointIterator& iterator) -{ - ASSERT(m_didSeeUnicodeFragmentCodePoint); - if (m_didSeeSyntaxViolation) - return; - m_didSeeSyntaxViolation = true; - - ASSERT(m_asciiBuffer.isEmpty()); - ASSERT(m_unicodeFragmentBuffer.isEmpty()); - size_t codeUnitsToCopy = iterator.codeUnitsSince(reinterpret_cast(m_inputBegin)); - size_t asciiCodeUnitsToCopy = m_url.m_queryEnd; - size_t unicodeCodeUnitsToCopy = codeUnitsToCopy - asciiCodeUnitsToCopy; - RELEASE_ASSERT(codeUnitsToCopy <= m_inputString.length()); - m_asciiBuffer.reserveCapacity(asciiCodeUnitsToCopy); - for (size_t i = 0; i < asciiCodeUnitsToCopy; ++i) { - ASSERT(isASCII(m_inputString[i])); - m_asciiBuffer.uncheckedAppend(m_inputString[i]); - } - m_unicodeFragmentBuffer.reserveCapacity(m_inputString.length() - asciiCodeUnitsToCopy); - for (size_t i = asciiCodeUnitsToCopy; i < asciiCodeUnitsToCopy + unicodeCodeUnitsToCopy; ++i) - m_unicodeFragmentBuffer.uncheckedAppend(m_inputString[i]); -} - void URLParser::failure() { m_url.invalidate(); @@ -1111,10 +1083,8 @@ ALWAYS_INLINE StringView URLParser::parsedDataView(size_t start, size_t length) template ALWAYS_INLINE size_t URLParser::currentPosition(const CodePointIterator& iterator) { - if (UNLIKELY(m_didSeeSyntaxViolation)) { - ASSERT(m_unicodeFragmentBuffer.isEmpty()); + if (UNLIKELY(m_didSeeSyntaxViolation)) return m_asciiBuffer.size(); - } return iterator.codeUnitsSince(reinterpret_cast(m_inputBegin)); } @@ -1160,7 +1130,6 @@ void URLParser::parse(const CharacterType* input, const unsigned length, const U URL_PARSER_LOG("Parsing URL <%s> base <%s> encoding <%s>", String(input, length).utf8().data(), base.string().utf8().data(), encoding.name()); m_url = { }; ASSERT(m_asciiBuffer.isEmpty()); - ASSERT(m_unicodeFragmentBuffer.isEmpty()); bool isUTF8Encoding = encoding == UTF8Encoding(); Vector queryBuffer; @@ -1811,35 +1780,9 @@ void URLParser::parse(const CharacterType* input, const unsigned length, const U } while (!c.atEnd()); break; case State::Fragment: - do { - URL_PARSER_LOG("State Fragment"); - if (!c.atEnd() && isTabOrNewline(*c)) { - if (m_didSeeUnicodeFragmentCodePoint) - fragmentSyntaxViolation(c); - else - syntaxViolation(c); - ++c; - continue; - } - if (!m_didSeeUnicodeFragmentCodePoint && isASCII(*c)) { - if (m_urlIsSpecial) - appendToASCIIBuffer(*c); - else - utf8PercentEncode(c); - } else { - if (m_urlIsSpecial) { - m_didSeeUnicodeFragmentCodePoint = true; - if (UNLIKELY(m_didSeeSyntaxViolation)) - appendCodePoint(m_unicodeFragmentBuffer, *c); - else { - ASSERT(m_asciiBuffer.isEmpty()); - ASSERT(m_unicodeFragmentBuffer.isEmpty()); - } - } else - utf8PercentEncode(c); - } - ++c; - } while (!c.atEnd()); + URL_PARSER_LOG("State Fragment"); + utf8PercentEncode(c); + ++c; break; } } @@ -2043,28 +1986,16 @@ void URLParser::parse(const CharacterType* input, const unsigned length, const U m_url.m_fragmentEnd = m_url.m_queryEnd; break; case State::Fragment: - { - LOG_FINAL_STATE("Fragment"); - size_t length = m_didSeeSyntaxViolation ? m_asciiBuffer.size() + m_unicodeFragmentBuffer.size() : c.codeUnitsSince(reinterpret_cast(m_inputBegin)); - m_url.m_fragmentEnd = length; - break; - } + LOG_FINAL_STATE("Fragment"); + m_url.m_fragmentEnd = currentPosition(c); + break; } if (LIKELY(!m_didSeeSyntaxViolation)) { m_url.m_string = m_inputString; ASSERT(m_asciiBuffer.isEmpty()); - ASSERT(m_unicodeFragmentBuffer.isEmpty()); - } else if (!m_didSeeUnicodeFragmentCodePoint) { - ASSERT(m_unicodeFragmentBuffer.isEmpty()); + } else m_url.m_string = String::adopt(WTFMove(m_asciiBuffer)); - } else { - Vector buffer; - buffer.reserveInitialCapacity(m_asciiBuffer.size() + m_unicodeFragmentBuffer.size()); - buffer.appendVector(m_asciiBuffer); - buffer.appendVector(m_unicodeFragmentBuffer); - m_url.m_string = String::adopt(WTFMove(buffer)); - } m_url.m_isValid = true; URL_PARSER_LOG("Parsed URL <%s>", m_url.m_string.utf8().data()); } diff --git a/Source/WebCore/platform/URLParser.h b/Source/WebCore/platform/URLParser.h index a61d16a731c01..a7b24616dca65 100644 --- a/Source/WebCore/platform/URLParser.h +++ b/Source/WebCore/platform/URLParser.h @@ -51,8 +51,6 @@ class URLParser { private: URL m_url; Vector m_asciiBuffer; - Vector m_unicodeFragmentBuffer; - bool m_didSeeUnicodeFragmentCodePoint { false }; bool m_urlIsSpecial { false }; bool m_hostHasPercentOrNonASCII { false }; String m_inputString; @@ -73,7 +71,6 @@ class URLParser { void advance(CodePointIterator&, const CodePointIterator& iteratorForSyntaxViolationPosition); template bool takesTwoAdvancesUntilEnd(CodePointIterator); template void syntaxViolation(const CodePointIterator&); - template void fragmentSyntaxViolation(const CodePointIterator&); template bool isPercentEncodedDot(CodePointIterator); template bool isWindowsDriveLetter(CodePointIterator); template bool isSingleDotPathSegment(CodePointIterator); diff --git a/Tools/ChangeLog b/Tools/ChangeLog index 5ff4ebcc25897..ba312b4ec03cf 100644 --- a/Tools/ChangeLog +++ b/Tools/ChangeLog @@ -1,3 +1,13 @@ +2016-10-11 Alex Christensen + + URLParser should percent-encode non-ASCII and non-printable characters in fragment + https://bugs.webkit.org/show_bug.cgi?id=163287 + + Reviewed by Brady Eidson. + + * TestWebKitAPI/Tests/WebCore/URLParser.cpp: + (TestWebKitAPI::TEST_F): + 2016-10-11 Alex Christensen Remove dead networking code diff --git a/Tools/TestWebKitAPI/Tests/WebCore/URLParser.cpp b/Tools/TestWebKitAPI/Tests/WebCore/URLParser.cpp index ef04f4c33ab7f..0ebc81a7e394f 100644 --- a/Tools/TestWebKitAPI/Tests/WebCore/URLParser.cpp +++ b/Tools/TestWebKitAPI/Tests/WebCore/URLParser.cpp @@ -331,6 +331,10 @@ TEST_F(URLParserTest, Basic) checkURL("aA://", {"aa", "", "", "", 0, "//", "", "", "aa://"}); checkURL(utf16String(u"foo://host/#ŠŸŠŸ\u0007 a