org.apache.httpcomponents
httpclient
diff --git a/src/main/java/org/apache/lucene/analysis/miscellaneous/XASCIIFoldingFilter.java b/src/main/java/org/apache/lucene/analysis/miscellaneous/XASCIIFoldingFilter.java
deleted file mode 100644
index 7d6614adfe078..0000000000000
--- a/src/main/java/org/apache/lucene/analysis/miscellaneous/XASCIIFoldingFilter.java
+++ /dev/null
@@ -1,2096 +0,0 @@
-package org.apache.lucene.analysis.miscellaneous;
-
-/*
- * Licensed to Elasticsearch under one or more contributor
- * license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright
- * ownership. Elasticsearch licenses this file to you under
- * the Apache License, Version 2.0 (the "License"); you may
- * not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-import org.apache.lucene.analysis.TokenFilter;
-import org.apache.lucene.analysis.TokenStream;
-import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
-import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
-import org.apache.lucene.util.ArrayUtil;
-import org.apache.lucene.util.RamUsageEstimator;
-import org.apache.lucene.util.Version;
-import org.elasticsearch.common.lucene.Lucene;
-
-import java.io.IOException;
-
-/**
- * This class converts alphabetic, numeric, and symbolic Unicode characters
- * which are not in the first 127 ASCII characters (the "Basic Latin" Unicode
- * block) into their ASCII equivalents, if one exists.
- *
- * Characters from the following Unicode blocks are converted; however, only
- * those characters with reasonable ASCII alternatives are converted:
- *
- *
- *
- * See: http://en.wikipedia.org/wiki/Latin_characters_in_Unicode
- *
- * For example, 'à' will be replaced by 'a'.
- */
-public final class XASCIIFoldingFilter extends TokenFilter {
- static {
- //Copied from LUCENE-5437. Remove when upgrading Lucene.
- assert Lucene.VERSION == Version.LUCENE_46: "LUCENE-5437";
- }
- private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
- private final PositionIncrementAttribute posIncAttr = addAttribute(PositionIncrementAttribute.class);
- private final boolean preserveOriginal;
- private char[] output = new char[512];
- private int outputPos;
- private State state;
-
- public XASCIIFoldingFilter(TokenStream input)
- {
- this(input, false);
- }
-
- /**
- * Create a new {@link ASCIIFoldingFilter}.
- *
- * @param in
- * TokenStream to filter
- * @param preserveOriginal
- * should the original tokens be kept on the input stream with a 0 position increment
- * from the folded tokens?
- **/
- public XASCIIFoldingFilter(TokenStream input, boolean preserveOriginal)
- {
- super(input);
- this.preserveOriginal = preserveOriginal;
- }
-
- /**
- * Does the filter preserve the original tokens?
- */
- public boolean isPreserveOriginal() {
- return preserveOriginal;
- }
-
- @Override
- public boolean incrementToken() throws IOException {
- if (state != null) {
- assert preserveOriginal : "state should only be captured if preserveOriginal is true";
- restoreState(state);
- posIncAttr.setPositionIncrement(0);
- state = null;
- return true;
- }
- if (input.incrementToken()) {
- final char[] buffer = termAtt.buffer();
- final int length = termAtt.length();
-
- // If no characters actually require rewriting then we
- // just return token as-is:
- for(int i = 0 ; i < length ; ++i) {
- final char c = buffer[i];
- if (c >= '\u0080')
- {
- foldToASCII(buffer, length);
- termAtt.copyBuffer(output, 0, outputPos);
- break;
- }
- }
- return true;
- } else {
- return false;
- }
- }
-
- @Override
- public void reset() throws IOException {
- super.reset();
- state = null;
- }
-
- /**
- * Converts characters above ASCII to their ASCII equivalents. For example,
- * accents are removed from accented characters.
- * @param input The string to fold
- * @param length The number of characters in the input string
- */
- public void foldToASCII(char[] input, int length)
- {
- if (preserveOriginal) {
- state = captureState();
- }
- // Worst-case length required:
- final int maxSizeNeeded = 4 * length;
- if (output.length < maxSizeNeeded) {
- output = new char[ArrayUtil.oversize(maxSizeNeeded, RamUsageEstimator.NUM_BYTES_CHAR)];
- }
-
- outputPos = foldToASCII(input, 0, output, 0, length);
- }
-
- /**
- * Converts characters above ASCII to their ASCII equivalents. For example,
- * accents are removed from accented characters.
- * @param input The characters to fold
- * @param inputPos Index of the first character to fold
- * @param output The result of the folding. Should be of size >= {@code length * 4}.
- * @param outputPos Index of output where to put the result of the folding
- * @param length The number of characters to fold
- * @return length of output
- * @lucene.internal
- */
- public static final int foldToASCII(char input[], int inputPos, char output[], int outputPos, int length)
- {
- final int end = inputPos + length;
- for (int pos = inputPos; pos < end ; ++pos) {
- final char c = input[pos];
-
- // Quick test: if it's not in range then just keep current character
- if (c < '\u0080') {
- output[outputPos++] = c;
- } else {
- switch (c) {
- case '\u00C0': // À [LATIN CAPITAL LETTER A WITH GRAVE]
- case '\u00C1': // Á [LATIN CAPITAL LETTER A WITH ACUTE]
- case '\u00C2': // Â [LATIN CAPITAL LETTER A WITH CIRCUMFLEX]
- case '\u00C3': // Ã [LATIN CAPITAL LETTER A WITH TILDE]
- case '\u00C4': // Ä [LATIN CAPITAL LETTER A WITH DIAERESIS]
- case '\u00C5': // Å [LATIN CAPITAL LETTER A WITH RING ABOVE]
- case '\u0100': // Ā [LATIN CAPITAL LETTER A WITH MACRON]
- case '\u0102': // Ă [LATIN CAPITAL LETTER A WITH BREVE]
- case '\u0104': // Ą [LATIN CAPITAL LETTER A WITH OGONEK]
- case '\u018F': // Ə http://en.wikipedia.org/wiki/Schwa [LATIN CAPITAL LETTER SCHWA]
- case '\u01CD': // Ǎ [LATIN CAPITAL LETTER A WITH CARON]
- case '\u01DE': // Ǟ [LATIN CAPITAL LETTER A WITH DIAERESIS AND MACRON]
- case '\u01E0': // Ǡ [LATIN CAPITAL LETTER A WITH DOT ABOVE AND MACRON]
- case '\u01FA': // Ǻ [LATIN CAPITAL LETTER A WITH RING ABOVE AND ACUTE]
- case '\u0200': // Ȁ [LATIN CAPITAL LETTER A WITH DOUBLE GRAVE]
- case '\u0202': // Ȃ [LATIN CAPITAL LETTER A WITH INVERTED BREVE]
- case '\u0226': // Ȧ [LATIN CAPITAL LETTER A WITH DOT ABOVE]
- case '\u023A': // Ⱥ [LATIN CAPITAL LETTER A WITH STROKE]
- case '\u1D00': // ᴀ [LATIN LETTER SMALL CAPITAL A]
- case '\u1E00': // Ḁ [LATIN CAPITAL LETTER A WITH RING BELOW]
- case '\u1EA0': // Ạ [LATIN CAPITAL LETTER A WITH DOT BELOW]
- case '\u1EA2': // Ả [LATIN CAPITAL LETTER A WITH HOOK ABOVE]
- case '\u1EA4': // Ấ [LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND ACUTE]
- case '\u1EA6': // Ầ [LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND GRAVE]
- case '\u1EA8': // Ẩ [LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND HOOK ABOVE]
- case '\u1EAA': // Ẫ [LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND TILDE]
- case '\u1EAC': // Ậ [LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND DOT BELOW]
- case '\u1EAE': // Ắ [LATIN CAPITAL LETTER A WITH BREVE AND ACUTE]
- case '\u1EB0': // Ằ [LATIN CAPITAL LETTER A WITH BREVE AND GRAVE]
- case '\u1EB2': // Ẳ [LATIN CAPITAL LETTER A WITH BREVE AND HOOK ABOVE]
- case '\u1EB4': // Ẵ [LATIN CAPITAL LETTER A WITH BREVE AND TILDE]
- case '\u1EB6': // Ặ [LATIN CAPITAL LETTER A WITH BREVE AND DOT BELOW]
- case '\u24B6': // Ⓐ [CIRCLED LATIN CAPITAL LETTER A]
- case '\uFF21': // A [FULLWIDTH LATIN CAPITAL LETTER A]
- output[outputPos++] = 'A';
- break;
- case '\u00E0': // à [LATIN SMALL LETTER A WITH GRAVE]
- case '\u00E1': // á [LATIN SMALL LETTER A WITH ACUTE]
- case '\u00E2': // â [LATIN SMALL LETTER A WITH CIRCUMFLEX]
- case '\u00E3': // ã [LATIN SMALL LETTER A WITH TILDE]
- case '\u00E4': // ä [LATIN SMALL LETTER A WITH DIAERESIS]
- case '\u00E5': // å [LATIN SMALL LETTER A WITH RING ABOVE]
- case '\u0101': // ā [LATIN SMALL LETTER A WITH MACRON]
- case '\u0103': // ă [LATIN SMALL LETTER A WITH BREVE]
- case '\u0105': // ą [LATIN SMALL LETTER A WITH OGONEK]
- case '\u01CE': // ǎ [LATIN SMALL LETTER A WITH CARON]
- case '\u01DF': // ǟ [LATIN SMALL LETTER A WITH DIAERESIS AND MACRON]
- case '\u01E1': // ǡ [LATIN SMALL LETTER A WITH DOT ABOVE AND MACRON]
- case '\u01FB': // ǻ [LATIN SMALL LETTER A WITH RING ABOVE AND ACUTE]
- case '\u0201': // ȁ [LATIN SMALL LETTER A WITH DOUBLE GRAVE]
- case '\u0203': // ȃ [LATIN SMALL LETTER A WITH INVERTED BREVE]
- case '\u0227': // ȧ [LATIN SMALL LETTER A WITH DOT ABOVE]
- case '\u0250': // ɐ [LATIN SMALL LETTER TURNED A]
- case '\u0259': // ə [LATIN SMALL LETTER SCHWA]
- case '\u025A': // ɚ [LATIN SMALL LETTER SCHWA WITH HOOK]
- case '\u1D8F': // ᶏ [LATIN SMALL LETTER A WITH RETROFLEX HOOK]
- case '\u1D95': // ᶕ [LATIN SMALL LETTER SCHWA WITH RETROFLEX HOOK]
- case '\u1E01': // ạ [LATIN SMALL LETTER A WITH RING BELOW]
- case '\u1E9A': // ả [LATIN SMALL LETTER A WITH RIGHT HALF RING]
- case '\u1EA1': // ạ [LATIN SMALL LETTER A WITH DOT BELOW]
- case '\u1EA3': // ả [LATIN SMALL LETTER A WITH HOOK ABOVE]
- case '\u1EA5': // ấ [LATIN SMALL LETTER A WITH CIRCUMFLEX AND ACUTE]
- case '\u1EA7': // ầ [LATIN SMALL LETTER A WITH CIRCUMFLEX AND GRAVE]
- case '\u1EA9': // ẩ [LATIN SMALL LETTER A WITH CIRCUMFLEX AND HOOK ABOVE]
- case '\u1EAB': // ẫ [LATIN SMALL LETTER A WITH CIRCUMFLEX AND TILDE]
- case '\u1EAD': // ậ [LATIN SMALL LETTER A WITH CIRCUMFLEX AND DOT BELOW]
- case '\u1EAF': // ắ [LATIN SMALL LETTER A WITH BREVE AND ACUTE]
- case '\u1EB1': // ằ [LATIN SMALL LETTER A WITH BREVE AND GRAVE]
- case '\u1EB3': // ẳ [LATIN SMALL LETTER A WITH BREVE AND HOOK ABOVE]
- case '\u1EB5': // ẵ [LATIN SMALL LETTER A WITH BREVE AND TILDE]
- case '\u1EB7': // ặ [LATIN SMALL LETTER A WITH BREVE AND DOT BELOW]
- case '\u2090': // ₐ [LATIN SUBSCRIPT SMALL LETTER A]
- case '\u2094': // ₔ [LATIN SUBSCRIPT SMALL LETTER SCHWA]
- case '\u24D0': // ⓐ [CIRCLED LATIN SMALL LETTER A]
- case '\u2C65': // ⱥ [LATIN SMALL LETTER A WITH STROKE]
- case '\u2C6F': // Ɐ [LATIN CAPITAL LETTER TURNED A]
- case '\uFF41': // a [FULLWIDTH LATIN SMALL LETTER A]
- output[outputPos++] = 'a';
- break;
- case '\uA732': // Ꜳ [LATIN CAPITAL LETTER AA]
- output[outputPos++] = 'A';
- output[outputPos++] = 'A';
- break;
- case '\u00C6': // Æ [LATIN CAPITAL LETTER AE]
- case '\u01E2': // Ǣ [LATIN CAPITAL LETTER AE WITH MACRON]
- case '\u01FC': // Ǽ [LATIN CAPITAL LETTER AE WITH ACUTE]
- case '\u1D01': // ᴁ [LATIN LETTER SMALL CAPITAL AE]
- output[outputPos++] = 'A';
- output[outputPos++] = 'E';
- break;
- case '\uA734': // Ꜵ [LATIN CAPITAL LETTER AO]
- output[outputPos++] = 'A';
- output[outputPos++] = 'O';
- break;
- case '\uA736': // Ꜷ [LATIN CAPITAL LETTER AU]
- output[outputPos++] = 'A';
- output[outputPos++] = 'U';
- break;
- case '\uA738': // Ꜹ [LATIN CAPITAL LETTER AV]
- case '\uA73A': // Ꜻ [LATIN CAPITAL LETTER AV WITH HORIZONTAL BAR]
- output[outputPos++] = 'A';
- output[outputPos++] = 'V';
- break;
- case '\uA73C': // Ꜽ [LATIN CAPITAL LETTER AY]
- output[outputPos++] = 'A';
- output[outputPos++] = 'Y';
- break;
- case '\u249C': // ⒜ [PARENTHESIZED LATIN SMALL LETTER A]
- output[outputPos++] = '(';
- output[outputPos++] = 'a';
- output[outputPos++] = ')';
- break;
- case '\uA733': // ꜳ [LATIN SMALL LETTER AA]
- output[outputPos++] = 'a';
- output[outputPos++] = 'a';
- break;
- case '\u00E6': // æ [LATIN SMALL LETTER AE]
- case '\u01E3': // ǣ [LATIN SMALL LETTER AE WITH MACRON]
- case '\u01FD': // ǽ [LATIN SMALL LETTER AE WITH ACUTE]
- case '\u1D02': // ᴂ [LATIN SMALL LETTER TURNED AE]
- output[outputPos++] = 'a';
- output[outputPos++] = 'e';
- break;
- case '\uA735': // ꜵ [LATIN SMALL LETTER AO]
- output[outputPos++] = 'a';
- output[outputPos++] = 'o';
- break;
- case '\uA737': // ꜷ [LATIN SMALL LETTER AU]
- output[outputPos++] = 'a';
- output[outputPos++] = 'u';
- break;
- case '\uA739': // ꜹ [LATIN SMALL LETTER AV]
- case '\uA73B': // ꜻ [LATIN SMALL LETTER AV WITH HORIZONTAL BAR]
- output[outputPos++] = 'a';
- output[outputPos++] = 'v';
- break;
- case '\uA73D': // ꜽ [LATIN SMALL LETTER AY]
- output[outputPos++] = 'a';
- output[outputPos++] = 'y';
- break;
- case '\u0181': // Ɓ [LATIN CAPITAL LETTER B WITH HOOK]
- case '\u0182': // Ƃ [LATIN CAPITAL LETTER B WITH TOPBAR]
- case '\u0243': // Ƀ [LATIN CAPITAL LETTER B WITH STROKE]
- case '\u0299': // ʙ [LATIN LETTER SMALL CAPITAL B]
- case '\u1D03': // ᴃ [LATIN LETTER SMALL CAPITAL BARRED B]
- case '\u1E02': // Ḃ [LATIN CAPITAL LETTER B WITH DOT ABOVE]
- case '\u1E04': // Ḅ [LATIN CAPITAL LETTER B WITH DOT BELOW]
- case '\u1E06': // Ḇ [LATIN CAPITAL LETTER B WITH LINE BELOW]
- case '\u24B7': // Ⓑ [CIRCLED LATIN CAPITAL LETTER B]
- case '\uFF22': // B [FULLWIDTH LATIN CAPITAL LETTER B]
- output[outputPos++] = 'B';
- break;
- case '\u0180': // ƀ [LATIN SMALL LETTER B WITH STROKE]
- case '\u0183': // ƃ [LATIN SMALL LETTER B WITH TOPBAR]
- case '\u0253': // ɓ [LATIN SMALL LETTER B WITH HOOK]
- case '\u1D6C': // ᵬ [LATIN SMALL LETTER B WITH MIDDLE TILDE]
- case '\u1D80': // ᶀ [LATIN SMALL LETTER B WITH PALATAL HOOK]
- case '\u1E03': // ḃ [LATIN SMALL LETTER B WITH DOT ABOVE]
- case '\u1E05': // ḅ [LATIN SMALL LETTER B WITH DOT BELOW]
- case '\u1E07': // ḇ [LATIN SMALL LETTER B WITH LINE BELOW]
- case '\u24D1': // ⓑ [CIRCLED LATIN SMALL LETTER B]
- case '\uFF42': // b [FULLWIDTH LATIN SMALL LETTER B]
- output[outputPos++] = 'b';
- break;
- case '\u249D': // ⒝ [PARENTHESIZED LATIN SMALL LETTER B]
- output[outputPos++] = '(';
- output[outputPos++] = 'b';
- output[outputPos++] = ')';
- break;
- case '\u00C7': // Ç [LATIN CAPITAL LETTER C WITH CEDILLA]
- case '\u0106': // Ć [LATIN CAPITAL LETTER C WITH ACUTE]
- case '\u0108': // Ĉ [LATIN CAPITAL LETTER C WITH CIRCUMFLEX]
- case '\u010A': // Ċ [LATIN CAPITAL LETTER C WITH DOT ABOVE]
- case '\u010C': // Č [LATIN CAPITAL LETTER C WITH CARON]
- case '\u0187': // Ƈ [LATIN CAPITAL LETTER C WITH HOOK]
- case '\u023B': // Ȼ [LATIN CAPITAL LETTER C WITH STROKE]
- case '\u0297': // ʗ [LATIN LETTER STRETCHED C]
- case '\u1D04': // ᴄ [LATIN LETTER SMALL CAPITAL C]
- case '\u1E08': // Ḉ [LATIN CAPITAL LETTER C WITH CEDILLA AND ACUTE]
- case '\u24B8': // Ⓒ [CIRCLED LATIN CAPITAL LETTER C]
- case '\uFF23': // C [FULLWIDTH LATIN CAPITAL LETTER C]
- output[outputPos++] = 'C';
- break;
- case '\u00E7': // ç [LATIN SMALL LETTER C WITH CEDILLA]
- case '\u0107': // ć [LATIN SMALL LETTER C WITH ACUTE]
- case '\u0109': // ĉ [LATIN SMALL LETTER C WITH CIRCUMFLEX]
- case '\u010B': // ċ [LATIN SMALL LETTER C WITH DOT ABOVE]
- case '\u010D': // č [LATIN SMALL LETTER C WITH CARON]
- case '\u0188': // ƈ [LATIN SMALL LETTER C WITH HOOK]
- case '\u023C': // ȼ [LATIN SMALL LETTER C WITH STROKE]
- case '\u0255': // ɕ [LATIN SMALL LETTER C WITH CURL]
- case '\u1E09': // ḉ [LATIN SMALL LETTER C WITH CEDILLA AND ACUTE]
- case '\u2184': // ↄ [LATIN SMALL LETTER REVERSED C]
- case '\u24D2': // ⓒ [CIRCLED LATIN SMALL LETTER C]
- case '\uA73E': // Ꜿ [LATIN CAPITAL LETTER REVERSED C WITH DOT]
- case '\uA73F': // ꜿ [LATIN SMALL LETTER REVERSED C WITH DOT]
- case '\uFF43': // c [FULLWIDTH LATIN SMALL LETTER C]
- output[outputPos++] = 'c';
- break;
- case '\u249E': // ⒞ [PARENTHESIZED LATIN SMALL LETTER C]
- output[outputPos++] = '(';
- output[outputPos++] = 'c';
- output[outputPos++] = ')';
- break;
- case '\u00D0': // Ð [LATIN CAPITAL LETTER ETH]
- case '\u010E': // Ď [LATIN CAPITAL LETTER D WITH CARON]
- case '\u0110': // Đ [LATIN CAPITAL LETTER D WITH STROKE]
- case '\u0189': // Ɖ [LATIN CAPITAL LETTER AFRICAN D]
- case '\u018A': // Ɗ [LATIN CAPITAL LETTER D WITH HOOK]
- case '\u018B': // Ƌ [LATIN CAPITAL LETTER D WITH TOPBAR]
- case '\u1D05': // ᴅ [LATIN LETTER SMALL CAPITAL D]
- case '\u1D06': // ᴆ [LATIN LETTER SMALL CAPITAL ETH]
- case '\u1E0A': // Ḋ [LATIN CAPITAL LETTER D WITH DOT ABOVE]
- case '\u1E0C': // Ḍ [LATIN CAPITAL LETTER D WITH DOT BELOW]
- case '\u1E0E': // Ḏ [LATIN CAPITAL LETTER D WITH LINE BELOW]
- case '\u1E10': // Ḑ [LATIN CAPITAL LETTER D WITH CEDILLA]
- case '\u1E12': // Ḓ [LATIN CAPITAL LETTER D WITH CIRCUMFLEX BELOW]
- case '\u24B9': // Ⓓ [CIRCLED LATIN CAPITAL LETTER D]
- case '\uA779': // Ꝺ [LATIN CAPITAL LETTER INSULAR D]
- case '\uFF24': // D [FULLWIDTH LATIN CAPITAL LETTER D]
- output[outputPos++] = 'D';
- break;
- case '\u00F0': // ð [LATIN SMALL LETTER ETH]
- case '\u010F': // ď [LATIN SMALL LETTER D WITH CARON]
- case '\u0111': // đ [LATIN SMALL LETTER D WITH STROKE]
- case '\u018C': // ƌ [LATIN SMALL LETTER D WITH TOPBAR]
- case '\u0221': // ȡ [LATIN SMALL LETTER D WITH CURL]
- case '\u0256': // ɖ [LATIN SMALL LETTER D WITH TAIL]
- case '\u0257': // ɗ [LATIN SMALL LETTER D WITH HOOK]
- case '\u1D6D': // ᵭ [LATIN SMALL LETTER D WITH MIDDLE TILDE]
- case '\u1D81': // ᶁ [LATIN SMALL LETTER D WITH PALATAL HOOK]
- case '\u1D91': // ᶑ [LATIN SMALL LETTER D WITH HOOK AND TAIL]
- case '\u1E0B': // ḋ [LATIN SMALL LETTER D WITH DOT ABOVE]
- case '\u1E0D': // ḍ [LATIN SMALL LETTER D WITH DOT BELOW]
- case '\u1E0F': // ḏ [LATIN SMALL LETTER D WITH LINE BELOW]
- case '\u1E11': // ḑ [LATIN SMALL LETTER D WITH CEDILLA]
- case '\u1E13': // ḓ [LATIN SMALL LETTER D WITH CIRCUMFLEX BELOW]
- case '\u24D3': // ⓓ [CIRCLED LATIN SMALL LETTER D]
- case '\uA77A': // ꝺ [LATIN SMALL LETTER INSULAR D]
- case '\uFF44': // d [FULLWIDTH LATIN SMALL LETTER D]
- output[outputPos++] = 'd';
- break;
- case '\u01C4': // DŽ [LATIN CAPITAL LETTER DZ WITH CARON]
- case '\u01F1': // DZ [LATIN CAPITAL LETTER DZ]
- output[outputPos++] = 'D';
- output[outputPos++] = 'Z';
- break;
- case '\u01C5': // Dž [LATIN CAPITAL LETTER D WITH SMALL LETTER Z WITH CARON]
- case '\u01F2': // Dz [LATIN CAPITAL LETTER D WITH SMALL LETTER Z]
- output[outputPos++] = 'D';
- output[outputPos++] = 'z';
- break;
- case '\u249F': // ⒟ [PARENTHESIZED LATIN SMALL LETTER D]
- output[outputPos++] = '(';
- output[outputPos++] = 'd';
- output[outputPos++] = ')';
- break;
- case '\u0238': // ȸ [LATIN SMALL LETTER DB DIGRAPH]
- output[outputPos++] = 'd';
- output[outputPos++] = 'b';
- break;
- case '\u01C6': // dž [LATIN SMALL LETTER DZ WITH CARON]
- case '\u01F3': // dz [LATIN SMALL LETTER DZ]
- case '\u02A3': // ʣ [LATIN SMALL LETTER DZ DIGRAPH]
- case '\u02A5': // ʥ [LATIN SMALL LETTER DZ DIGRAPH WITH CURL]
- output[outputPos++] = 'd';
- output[outputPos++] = 'z';
- break;
- case '\u00C8': // È [LATIN CAPITAL LETTER E WITH GRAVE]
- case '\u00C9': // É [LATIN CAPITAL LETTER E WITH ACUTE]
- case '\u00CA': // Ê [LATIN CAPITAL LETTER E WITH CIRCUMFLEX]
- case '\u00CB': // Ë [LATIN CAPITAL LETTER E WITH DIAERESIS]
- case '\u0112': // Ē [LATIN CAPITAL LETTER E WITH MACRON]
- case '\u0114': // Ĕ [LATIN CAPITAL LETTER E WITH BREVE]
- case '\u0116': // Ė [LATIN CAPITAL LETTER E WITH DOT ABOVE]
- case '\u0118': // Ę [LATIN CAPITAL LETTER E WITH OGONEK]
- case '\u011A': // Ě [LATIN CAPITAL LETTER E WITH CARON]
- case '\u018E': // Ǝ [LATIN CAPITAL LETTER REVERSED E]
- case '\u0190': // Ɛ [LATIN CAPITAL LETTER OPEN E]
- case '\u0204': // Ȅ [LATIN CAPITAL LETTER E WITH DOUBLE GRAVE]
- case '\u0206': // Ȇ [LATIN CAPITAL LETTER E WITH INVERTED BREVE]
- case '\u0228': // Ȩ [LATIN CAPITAL LETTER E WITH CEDILLA]
- case '\u0246': // Ɇ [LATIN CAPITAL LETTER E WITH STROKE]
- case '\u1D07': // ᴇ [LATIN LETTER SMALL CAPITAL E]
- case '\u1E14': // Ḕ [LATIN CAPITAL LETTER E WITH MACRON AND GRAVE]
- case '\u1E16': // Ḗ [LATIN CAPITAL LETTER E WITH MACRON AND ACUTE]
- case '\u1E18': // Ḙ [LATIN CAPITAL LETTER E WITH CIRCUMFLEX BELOW]
- case '\u1E1A': // Ḛ [LATIN CAPITAL LETTER E WITH TILDE BELOW]
- case '\u1E1C': // Ḝ [LATIN CAPITAL LETTER E WITH CEDILLA AND BREVE]
- case '\u1EB8': // Ẹ [LATIN CAPITAL LETTER E WITH DOT BELOW]
- case '\u1EBA': // Ẻ [LATIN CAPITAL LETTER E WITH HOOK ABOVE]
- case '\u1EBC': // Ẽ [LATIN CAPITAL LETTER E WITH TILDE]
- case '\u1EBE': // Ế [LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND ACUTE]
- case '\u1EC0': // Ề [LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND GRAVE]
- case '\u1EC2': // Ể [LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND HOOK ABOVE]
- case '\u1EC4': // Ễ [LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND TILDE]
- case '\u1EC6': // Ệ [LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND DOT BELOW]
- case '\u24BA': // Ⓔ [CIRCLED LATIN CAPITAL LETTER E]
- case '\u2C7B': // ⱻ [LATIN LETTER SMALL CAPITAL TURNED E]
- case '\uFF25': // E [FULLWIDTH LATIN CAPITAL LETTER E]
- output[outputPos++] = 'E';
- break;
- case '\u00E8': // è [LATIN SMALL LETTER E WITH GRAVE]
- case '\u00E9': // é [LATIN SMALL LETTER E WITH ACUTE]
- case '\u00EA': // ê [LATIN SMALL LETTER E WITH CIRCUMFLEX]
- case '\u00EB': // ë [LATIN SMALL LETTER E WITH DIAERESIS]
- case '\u0113': // ē [LATIN SMALL LETTER E WITH MACRON]
- case '\u0115': // ĕ [LATIN SMALL LETTER E WITH BREVE]
- case '\u0117': // ė [LATIN SMALL LETTER E WITH DOT ABOVE]
- case '\u0119': // ę [LATIN SMALL LETTER E WITH OGONEK]
- case '\u011B': // ě [LATIN SMALL LETTER E WITH CARON]
- case '\u01DD': // ǝ [LATIN SMALL LETTER TURNED E]
- case '\u0205': // ȅ [LATIN SMALL LETTER E WITH DOUBLE GRAVE]
- case '\u0207': // ȇ [LATIN SMALL LETTER E WITH INVERTED BREVE]
- case '\u0229': // ȩ [LATIN SMALL LETTER E WITH CEDILLA]
- case '\u0247': // ɇ [LATIN SMALL LETTER E WITH STROKE]
- case '\u0258': // ɘ [LATIN SMALL LETTER REVERSED E]
- case '\u025B': // ɛ [LATIN SMALL LETTER OPEN E]
- case '\u025C': // ɜ [LATIN SMALL LETTER REVERSED OPEN E]
- case '\u025D': // ɝ [LATIN SMALL LETTER REVERSED OPEN E WITH HOOK]
- case '\u025E': // ɞ [LATIN SMALL LETTER CLOSED REVERSED OPEN E]
- case '\u029A': // ʚ [LATIN SMALL LETTER CLOSED OPEN E]
- case '\u1D08': // ᴈ [LATIN SMALL LETTER TURNED OPEN E]
- case '\u1D92': // ᶒ [LATIN SMALL LETTER E WITH RETROFLEX HOOK]
- case '\u1D93': // ᶓ [LATIN SMALL LETTER OPEN E WITH RETROFLEX HOOK]
- case '\u1D94': // ᶔ [LATIN SMALL LETTER REVERSED OPEN E WITH RETROFLEX HOOK]
- case '\u1E15': // ḕ [LATIN SMALL LETTER E WITH MACRON AND GRAVE]
- case '\u1E17': // ḗ [LATIN SMALL LETTER E WITH MACRON AND ACUTE]
- case '\u1E19': // ḙ [LATIN SMALL LETTER E WITH CIRCUMFLEX BELOW]
- case '\u1E1B': // ḛ [LATIN SMALL LETTER E WITH TILDE BELOW]
- case '\u1E1D': // ḝ [LATIN SMALL LETTER E WITH CEDILLA AND BREVE]
- case '\u1EB9': // ẹ [LATIN SMALL LETTER E WITH DOT BELOW]
- case '\u1EBB': // ẻ [LATIN SMALL LETTER E WITH HOOK ABOVE]
- case '\u1EBD': // ẽ [LATIN SMALL LETTER E WITH TILDE]
- case '\u1EBF': // ế [LATIN SMALL LETTER E WITH CIRCUMFLEX AND ACUTE]
- case '\u1EC1': // ề [LATIN SMALL LETTER E WITH CIRCUMFLEX AND GRAVE]
- case '\u1EC3': // ể [LATIN SMALL LETTER E WITH CIRCUMFLEX AND HOOK ABOVE]
- case '\u1EC5': // ễ [LATIN SMALL LETTER E WITH CIRCUMFLEX AND TILDE]
- case '\u1EC7': // ệ [LATIN SMALL LETTER E WITH CIRCUMFLEX AND DOT BELOW]
- case '\u2091': // ₑ [LATIN SUBSCRIPT SMALL LETTER E]
- case '\u24D4': // ⓔ [CIRCLED LATIN SMALL LETTER E]
- case '\u2C78': // ⱸ [LATIN SMALL LETTER E WITH NOTCH]
- case '\uFF45': // e [FULLWIDTH LATIN SMALL LETTER E]
- output[outputPos++] = 'e';
- break;
- case '\u24A0': // ⒠ [PARENTHESIZED LATIN SMALL LETTER E]
- output[outputPos++] = '(';
- output[outputPos++] = 'e';
- output[outputPos++] = ')';
- break;
- case '\u0191': // Ƒ [LATIN CAPITAL LETTER F WITH HOOK]
- case '\u1E1E': // Ḟ [LATIN CAPITAL LETTER F WITH DOT ABOVE]
- case '\u24BB': // Ⓕ [CIRCLED LATIN CAPITAL LETTER F]
- case '\uA730': // ꜰ [LATIN LETTER SMALL CAPITAL F]
- case '\uA77B': // Ꝼ [LATIN CAPITAL LETTER INSULAR F]
- case '\uA7FB': // ꟻ [LATIN EPIGRAPHIC LETTER REVERSED F]
- case '\uFF26': // F [FULLWIDTH LATIN CAPITAL LETTER F]
- output[outputPos++] = 'F';
- break;
- case '\u0192': // ƒ [LATIN SMALL LETTER F WITH HOOK]
- case '\u1D6E': // ᵮ [LATIN SMALL LETTER F WITH MIDDLE TILDE]
- case '\u1D82': // ᶂ [LATIN SMALL LETTER F WITH PALATAL HOOK]
- case '\u1E1F': // ḟ [LATIN SMALL LETTER F WITH DOT ABOVE]
- case '\u1E9B': // ẛ [LATIN SMALL LETTER LONG S WITH DOT ABOVE]
- case '\u24D5': // ⓕ [CIRCLED LATIN SMALL LETTER F]
- case '\uA77C': // ꝼ [LATIN SMALL LETTER INSULAR F]
- case '\uFF46': // f [FULLWIDTH LATIN SMALL LETTER F]
- output[outputPos++] = 'f';
- break;
- case '\u24A1': // ⒡ [PARENTHESIZED LATIN SMALL LETTER F]
- output[outputPos++] = '(';
- output[outputPos++] = 'f';
- output[outputPos++] = ')';
- break;
- case '\uFB00': // ff [LATIN SMALL LIGATURE FF]
- output[outputPos++] = 'f';
- output[outputPos++] = 'f';
- break;
- case '\uFB03': // ffi [LATIN SMALL LIGATURE FFI]
- output[outputPos++] = 'f';
- output[outputPos++] = 'f';
- output[outputPos++] = 'i';
- break;
- case '\uFB04': // ffl [LATIN SMALL LIGATURE FFL]
- output[outputPos++] = 'f';
- output[outputPos++] = 'f';
- output[outputPos++] = 'l';
- break;
- case '\uFB01': // fi [LATIN SMALL LIGATURE FI]
- output[outputPos++] = 'f';
- output[outputPos++] = 'i';
- break;
- case '\uFB02': // fl [LATIN SMALL LIGATURE FL]
- output[outputPos++] = 'f';
- output[outputPos++] = 'l';
- break;
- case '\u011C': // Ĝ [LATIN CAPITAL LETTER G WITH CIRCUMFLEX]
- case '\u011E': // Ğ [LATIN CAPITAL LETTER G WITH BREVE]
- case '\u0120': // Ġ [LATIN CAPITAL LETTER G WITH DOT ABOVE]
- case '\u0122': // Ģ [LATIN CAPITAL LETTER G WITH CEDILLA]
- case '\u0193': // Ɠ [LATIN CAPITAL LETTER G WITH HOOK]
- case '\u01E4': // Ǥ [LATIN CAPITAL LETTER G WITH STROKE]
- case '\u01E5': // ǥ [LATIN SMALL LETTER G WITH STROKE]
- case '\u01E6': // Ǧ [LATIN CAPITAL LETTER G WITH CARON]
- case '\u01E7': // ǧ [LATIN SMALL LETTER G WITH CARON]
- case '\u01F4': // Ǵ [LATIN CAPITAL LETTER G WITH ACUTE]
- case '\u0262': // ɢ [LATIN LETTER SMALL CAPITAL G]
- case '\u029B': // ʛ [LATIN LETTER SMALL CAPITAL G WITH HOOK]
- case '\u1E20': // Ḡ [LATIN CAPITAL LETTER G WITH MACRON]
- case '\u24BC': // Ⓖ [CIRCLED LATIN CAPITAL LETTER G]
- case '\uA77D': // Ᵹ [LATIN CAPITAL LETTER INSULAR G]
- case '\uA77E': // Ꝿ [LATIN CAPITAL LETTER TURNED INSULAR G]
- case '\uFF27': // G [FULLWIDTH LATIN CAPITAL LETTER G]
- output[outputPos++] = 'G';
- break;
- case '\u011D': // ĝ [LATIN SMALL LETTER G WITH CIRCUMFLEX]
- case '\u011F': // ğ [LATIN SMALL LETTER G WITH BREVE]
- case '\u0121': // ġ [LATIN SMALL LETTER G WITH DOT ABOVE]
- case '\u0123': // ģ [LATIN SMALL LETTER G WITH CEDILLA]
- case '\u01F5': // ǵ [LATIN SMALL LETTER G WITH ACUTE]
- case '\u0260': // ɠ [LATIN SMALL LETTER G WITH HOOK]
- case '\u0261': // ɡ [LATIN SMALL LETTER SCRIPT G]
- case '\u1D77': // ᵷ [LATIN SMALL LETTER TURNED G]
- case '\u1D79': // ᵹ [LATIN SMALL LETTER INSULAR G]
- case '\u1D83': // ᶃ [LATIN SMALL LETTER G WITH PALATAL HOOK]
- case '\u1E21': // ḡ [LATIN SMALL LETTER G WITH MACRON]
- case '\u24D6': // ⓖ [CIRCLED LATIN SMALL LETTER G]
- case '\uA77F': // ꝿ [LATIN SMALL LETTER TURNED INSULAR G]
- case '\uFF47': // g [FULLWIDTH LATIN SMALL LETTER G]
- output[outputPos++] = 'g';
- break;
- case '\u24A2': // ⒢ [PARENTHESIZED LATIN SMALL LETTER G]
- output[outputPos++] = '(';
- output[outputPos++] = 'g';
- output[outputPos++] = ')';
- break;
- case '\u0124': // Ĥ [LATIN CAPITAL LETTER H WITH CIRCUMFLEX]
- case '\u0126': // Ħ [LATIN CAPITAL LETTER H WITH STROKE]
- case '\u021E': // Ȟ [LATIN CAPITAL LETTER H WITH CARON]
- case '\u029C': // ʜ [LATIN LETTER SMALL CAPITAL H]
- case '\u1E22': // Ḣ [LATIN CAPITAL LETTER H WITH DOT ABOVE]
- case '\u1E24': // Ḥ [LATIN CAPITAL LETTER H WITH DOT BELOW]
- case '\u1E26': // Ḧ [LATIN CAPITAL LETTER H WITH DIAERESIS]
- case '\u1E28': // Ḩ [LATIN CAPITAL LETTER H WITH CEDILLA]
- case '\u1E2A': // Ḫ [LATIN CAPITAL LETTER H WITH BREVE BELOW]
- case '\u24BD': // Ⓗ [CIRCLED LATIN CAPITAL LETTER H]
- case '\u2C67': // Ⱨ [LATIN CAPITAL LETTER H WITH DESCENDER]
- case '\u2C75': // Ⱶ [LATIN CAPITAL LETTER HALF H]
- case '\uFF28': // H [FULLWIDTH LATIN CAPITAL LETTER H]
- output[outputPos++] = 'H';
- break;
- case '\u0125': // ĥ [LATIN SMALL LETTER H WITH CIRCUMFLEX]
- case '\u0127': // ħ [LATIN SMALL LETTER H WITH STROKE]
- case '\u021F': // ȟ [LATIN SMALL LETTER H WITH CARON]
- case '\u0265': // ɥ [LATIN SMALL LETTER TURNED H]
- case '\u0266': // ɦ [LATIN SMALL LETTER H WITH HOOK]
- case '\u02AE': // ʮ [LATIN SMALL LETTER TURNED H WITH FISHHOOK]
- case '\u02AF': // ʯ [LATIN SMALL LETTER TURNED H WITH FISHHOOK AND TAIL]
- case '\u1E23': // ḣ [LATIN SMALL LETTER H WITH DOT ABOVE]
- case '\u1E25': // ḥ [LATIN SMALL LETTER H WITH DOT BELOW]
- case '\u1E27': // ḧ [LATIN SMALL LETTER H WITH DIAERESIS]
- case '\u1E29': // ḩ [LATIN SMALL LETTER H WITH CEDILLA]
- case '\u1E2B': // ḫ [LATIN SMALL LETTER H WITH BREVE BELOW]
- case '\u1E96': // ẖ [LATIN SMALL LETTER H WITH LINE BELOW]
- case '\u24D7': // ⓗ [CIRCLED LATIN SMALL LETTER H]
- case '\u2C68': // ⱨ [LATIN SMALL LETTER H WITH DESCENDER]
- case '\u2C76': // ⱶ [LATIN SMALL LETTER HALF H]
- case '\uFF48': // h [FULLWIDTH LATIN SMALL LETTER H]
- output[outputPos++] = 'h';
- break;
- case '\u01F6': // Ƕ http://en.wikipedia.org/wiki/Hwair [LATIN CAPITAL LETTER HWAIR]
- output[outputPos++] = 'H';
- output[outputPos++] = 'V';
- break;
- case '\u24A3': // ⒣ [PARENTHESIZED LATIN SMALL LETTER H]
- output[outputPos++] = '(';
- output[outputPos++] = 'h';
- output[outputPos++] = ')';
- break;
- case '\u0195': // ƕ [LATIN SMALL LETTER HV]
- output[outputPos++] = 'h';
- output[outputPos++] = 'v';
- break;
- case '\u00CC': // Ì [LATIN CAPITAL LETTER I WITH GRAVE]
- case '\u00CD': // Í [LATIN CAPITAL LETTER I WITH ACUTE]
- case '\u00CE': // Î [LATIN CAPITAL LETTER I WITH CIRCUMFLEX]
- case '\u00CF': // Ï [LATIN CAPITAL LETTER I WITH DIAERESIS]
- case '\u0128': // Ĩ [LATIN CAPITAL LETTER I WITH TILDE]
- case '\u012A': // Ī [LATIN CAPITAL LETTER I WITH MACRON]
- case '\u012C': // Ĭ [LATIN CAPITAL LETTER I WITH BREVE]
- case '\u012E': // Į [LATIN CAPITAL LETTER I WITH OGONEK]
- case '\u0130': // İ [LATIN CAPITAL LETTER I WITH DOT ABOVE]
- case '\u0196': // Ɩ [LATIN CAPITAL LETTER IOTA]
- case '\u0197': // Ɨ [LATIN CAPITAL LETTER I WITH STROKE]
- case '\u01CF': // Ǐ [LATIN CAPITAL LETTER I WITH CARON]
- case '\u0208': // Ȉ [LATIN CAPITAL LETTER I WITH DOUBLE GRAVE]
- case '\u020A': // Ȋ [LATIN CAPITAL LETTER I WITH INVERTED BREVE]
- case '\u026A': // ɪ [LATIN LETTER SMALL CAPITAL I]
- case '\u1D7B': // ᵻ [LATIN SMALL CAPITAL LETTER I WITH STROKE]
- case '\u1E2C': // Ḭ [LATIN CAPITAL LETTER I WITH TILDE BELOW]
- case '\u1E2E': // Ḯ [LATIN CAPITAL LETTER I WITH DIAERESIS AND ACUTE]
- case '\u1EC8': // Ỉ [LATIN CAPITAL LETTER I WITH HOOK ABOVE]
- case '\u1ECA': // Ị [LATIN CAPITAL LETTER I WITH DOT BELOW]
- case '\u24BE': // Ⓘ [CIRCLED LATIN CAPITAL LETTER I]
- case '\uA7FE': // ꟾ [LATIN EPIGRAPHIC LETTER I LONGA]
- case '\uFF29': // I [FULLWIDTH LATIN CAPITAL LETTER I]
- output[outputPos++] = 'I';
- break;
- case '\u00EC': // ì [LATIN SMALL LETTER I WITH GRAVE]
- case '\u00ED': // í [LATIN SMALL LETTER I WITH ACUTE]
- case '\u00EE': // î [LATIN SMALL LETTER I WITH CIRCUMFLEX]
- case '\u00EF': // ï [LATIN SMALL LETTER I WITH DIAERESIS]
- case '\u0129': // ĩ [LATIN SMALL LETTER I WITH TILDE]
- case '\u012B': // ī [LATIN SMALL LETTER I WITH MACRON]
- case '\u012D': // ĭ [LATIN SMALL LETTER I WITH BREVE]
- case '\u012F': // į [LATIN SMALL LETTER I WITH OGONEK]
- case '\u0131': // ı [LATIN SMALL LETTER DOTLESS I]
- case '\u01D0': // ǐ [LATIN SMALL LETTER I WITH CARON]
- case '\u0209': // ȉ [LATIN SMALL LETTER I WITH DOUBLE GRAVE]
- case '\u020B': // ȋ [LATIN SMALL LETTER I WITH INVERTED BREVE]
- case '\u0268': // ɨ [LATIN SMALL LETTER I WITH STROKE]
- case '\u1D09': // ᴉ [LATIN SMALL LETTER TURNED I]
- case '\u1D62': // ᵢ [LATIN SUBSCRIPT SMALL LETTER I]
- case '\u1D7C': // ᵼ [LATIN SMALL LETTER IOTA WITH STROKE]
- case '\u1D96': // ᶖ [LATIN SMALL LETTER I WITH RETROFLEX HOOK]
- case '\u1E2D': // ḭ [LATIN SMALL LETTER I WITH TILDE BELOW]
- case '\u1E2F': // ḯ [LATIN SMALL LETTER I WITH DIAERESIS AND ACUTE]
- case '\u1EC9': // ỉ [LATIN SMALL LETTER I WITH HOOK ABOVE]
- case '\u1ECB': // ị [LATIN SMALL LETTER I WITH DOT BELOW]
- case '\u2071': // ⁱ [SUPERSCRIPT LATIN SMALL LETTER I]
- case '\u24D8': // ⓘ [CIRCLED LATIN SMALL LETTER I]
- case '\uFF49': // i [FULLWIDTH LATIN SMALL LETTER I]
- output[outputPos++] = 'i';
- break;
- case '\u0132': // IJ [LATIN CAPITAL LIGATURE IJ]
- output[outputPos++] = 'I';
- output[outputPos++] = 'J';
- break;
- case '\u24A4': // ⒤ [PARENTHESIZED LATIN SMALL LETTER I]
- output[outputPos++] = '(';
- output[outputPos++] = 'i';
- output[outputPos++] = ')';
- break;
- case '\u0133': // ij [LATIN SMALL LIGATURE IJ]
- output[outputPos++] = 'i';
- output[outputPos++] = 'j';
- break;
- case '\u0134': // Ĵ [LATIN CAPITAL LETTER J WITH CIRCUMFLEX]
- case '\u0248': // Ɉ [LATIN CAPITAL LETTER J WITH STROKE]
- case '\u1D0A': // ᴊ [LATIN LETTER SMALL CAPITAL J]
- case '\u24BF': // Ⓙ [CIRCLED LATIN CAPITAL LETTER J]
- case '\uFF2A': // J [FULLWIDTH LATIN CAPITAL LETTER J]
- output[outputPos++] = 'J';
- break;
- case '\u0135': // ĵ [LATIN SMALL LETTER J WITH CIRCUMFLEX]
- case '\u01F0': // ǰ [LATIN SMALL LETTER J WITH CARON]
- case '\u0237': // ȷ [LATIN SMALL LETTER DOTLESS J]
- case '\u0249': // ɉ [LATIN SMALL LETTER J WITH STROKE]
- case '\u025F': // ɟ [LATIN SMALL LETTER DOTLESS J WITH STROKE]
- case '\u0284': // ʄ [LATIN SMALL LETTER DOTLESS J WITH STROKE AND HOOK]
- case '\u029D': // ʝ [LATIN SMALL LETTER J WITH CROSSED-TAIL]
- case '\u24D9': // ⓙ [CIRCLED LATIN SMALL LETTER J]
- case '\u2C7C': // ⱼ [LATIN SUBSCRIPT SMALL LETTER J]
- case '\uFF4A': // j [FULLWIDTH LATIN SMALL LETTER J]
- output[outputPos++] = 'j';
- break;
- case '\u24A5': // ⒥ [PARENTHESIZED LATIN SMALL LETTER J]
- output[outputPos++] = '(';
- output[outputPos++] = 'j';
- output[outputPos++] = ')';
- break;
- case '\u0136': // Ķ [LATIN CAPITAL LETTER K WITH CEDILLA]
- case '\u0198': // Ƙ [LATIN CAPITAL LETTER K WITH HOOK]
- case '\u01E8': // Ǩ [LATIN CAPITAL LETTER K WITH CARON]
- case '\u1D0B': // ᴋ [LATIN LETTER SMALL CAPITAL K]
- case '\u1E30': // Ḱ [LATIN CAPITAL LETTER K WITH ACUTE]
- case '\u1E32': // Ḳ [LATIN CAPITAL LETTER K WITH DOT BELOW]
- case '\u1E34': // Ḵ [LATIN CAPITAL LETTER K WITH LINE BELOW]
- case '\u24C0': // Ⓚ [CIRCLED LATIN CAPITAL LETTER K]
- case '\u2C69': // Ⱪ [LATIN CAPITAL LETTER K WITH DESCENDER]
- case '\uA740': // Ꝁ [LATIN CAPITAL LETTER K WITH STROKE]
- case '\uA742': // Ꝃ [LATIN CAPITAL LETTER K WITH DIAGONAL STROKE]
- case '\uA744': // Ꝅ [LATIN CAPITAL LETTER K WITH STROKE AND DIAGONAL STROKE]
- case '\uFF2B': // K [FULLWIDTH LATIN CAPITAL LETTER K]
- output[outputPos++] = 'K';
- break;
- case '\u0137': // ķ [LATIN SMALL LETTER K WITH CEDILLA]
- case '\u0199': // ƙ [LATIN SMALL LETTER K WITH HOOK]
- case '\u01E9': // ǩ [LATIN SMALL LETTER K WITH CARON]
- case '\u029E': // ʞ [LATIN SMALL LETTER TURNED K]
- case '\u1D84': // ᶄ [LATIN SMALL LETTER K WITH PALATAL HOOK]
- case '\u1E31': // ḱ [LATIN SMALL LETTER K WITH ACUTE]
- case '\u1E33': // ḳ [LATIN SMALL LETTER K WITH DOT BELOW]
- case '\u1E35': // ḵ [LATIN SMALL LETTER K WITH LINE BELOW]
- case '\u24DA': // ⓚ [CIRCLED LATIN SMALL LETTER K]
- case '\u2C6A': // ⱪ [LATIN SMALL LETTER K WITH DESCENDER]
- case '\uA741': // ꝁ [LATIN SMALL LETTER K WITH STROKE]
- case '\uA743': // ꝃ [LATIN SMALL LETTER K WITH DIAGONAL STROKE]
- case '\uA745': // ꝅ [LATIN SMALL LETTER K WITH STROKE AND DIAGONAL STROKE]
- case '\uFF4B': // k [FULLWIDTH LATIN SMALL LETTER K]
- output[outputPos++] = 'k';
- break;
- case '\u24A6': // ⒦ [PARENTHESIZED LATIN SMALL LETTER K]
- output[outputPos++] = '(';
- output[outputPos++] = 'k';
- output[outputPos++] = ')';
- break;
- case '\u0139': // Ĺ [LATIN CAPITAL LETTER L WITH ACUTE]
- case '\u013B': // Ļ [LATIN CAPITAL LETTER L WITH CEDILLA]
- case '\u013D': // Ľ [LATIN CAPITAL LETTER L WITH CARON]
- case '\u013F': // Ŀ [LATIN CAPITAL LETTER L WITH MIDDLE DOT]
- case '\u0141': // Ł [LATIN CAPITAL LETTER L WITH STROKE]
- case '\u023D': // Ƚ [LATIN CAPITAL LETTER L WITH BAR]
- case '\u029F': // ʟ [LATIN LETTER SMALL CAPITAL L]
- case '\u1D0C': // ᴌ [LATIN LETTER SMALL CAPITAL L WITH STROKE]
- case '\u1E36': // Ḷ [LATIN CAPITAL LETTER L WITH DOT BELOW]
- case '\u1E38': // Ḹ [LATIN CAPITAL LETTER L WITH DOT BELOW AND MACRON]
- case '\u1E3A': // Ḻ [LATIN CAPITAL LETTER L WITH LINE BELOW]
- case '\u1E3C': // Ḽ [LATIN CAPITAL LETTER L WITH CIRCUMFLEX BELOW]
- case '\u24C1': // Ⓛ [CIRCLED LATIN CAPITAL LETTER L]
- case '\u2C60': // Ⱡ [LATIN CAPITAL LETTER L WITH DOUBLE BAR]
- case '\u2C62': // Ɫ [LATIN CAPITAL LETTER L WITH MIDDLE TILDE]
- case '\uA746': // Ꝇ [LATIN CAPITAL LETTER BROKEN L]
- case '\uA748': // Ꝉ [LATIN CAPITAL LETTER L WITH HIGH STROKE]
- case '\uA780': // Ꞁ [LATIN CAPITAL LETTER TURNED L]
- case '\uFF2C': // L [FULLWIDTH LATIN CAPITAL LETTER L]
- output[outputPos++] = 'L';
- break;
- case '\u013A': // ĺ [LATIN SMALL LETTER L WITH ACUTE]
- case '\u013C': // ļ [LATIN SMALL LETTER L WITH CEDILLA]
- case '\u013E': // ľ [LATIN SMALL LETTER L WITH CARON]
- case '\u0140': // ŀ [LATIN SMALL LETTER L WITH MIDDLE DOT]
- case '\u0142': // ł [LATIN SMALL LETTER L WITH STROKE]
- case '\u019A': // ƚ [LATIN SMALL LETTER L WITH BAR]
- case '\u0234': // ȴ [LATIN SMALL LETTER L WITH CURL]
- case '\u026B': // ɫ [LATIN SMALL LETTER L WITH MIDDLE TILDE]
- case '\u026C': // ɬ [LATIN SMALL LETTER L WITH BELT]
- case '\u026D': // ɭ [LATIN SMALL LETTER L WITH RETROFLEX HOOK]
- case '\u1D85': // ᶅ [LATIN SMALL LETTER L WITH PALATAL HOOK]
- case '\u1E37': // ḷ [LATIN SMALL LETTER L WITH DOT BELOW]
- case '\u1E39': // ḹ [LATIN SMALL LETTER L WITH DOT BELOW AND MACRON]
- case '\u1E3B': // ḻ [LATIN SMALL LETTER L WITH LINE BELOW]
- case '\u1E3D': // ḽ [LATIN SMALL LETTER L WITH CIRCUMFLEX BELOW]
- case '\u24DB': // ⓛ [CIRCLED LATIN SMALL LETTER L]
- case '\u2C61': // ⱡ [LATIN SMALL LETTER L WITH DOUBLE BAR]
- case '\uA747': // ꝇ [LATIN SMALL LETTER BROKEN L]
- case '\uA749': // ꝉ [LATIN SMALL LETTER L WITH HIGH STROKE]
- case '\uA781': // ꞁ [LATIN SMALL LETTER TURNED L]
- case '\uFF4C': // l [FULLWIDTH LATIN SMALL LETTER L]
- output[outputPos++] = 'l';
- break;
- case '\u01C7': // LJ [LATIN CAPITAL LETTER LJ]
- output[outputPos++] = 'L';
- output[outputPos++] = 'J';
- break;
- case '\u1EFA': // Ỻ [LATIN CAPITAL LETTER MIDDLE-WELSH LL]
- output[outputPos++] = 'L';
- output[outputPos++] = 'L';
- break;
- case '\u01C8': // Lj [LATIN CAPITAL LETTER L WITH SMALL LETTER J]
- output[outputPos++] = 'L';
- output[outputPos++] = 'j';
- break;
- case '\u24A7': // ⒧ [PARENTHESIZED LATIN SMALL LETTER L]
- output[outputPos++] = '(';
- output[outputPos++] = 'l';
- output[outputPos++] = ')';
- break;
- case '\u01C9': // lj [LATIN SMALL LETTER LJ]
- output[outputPos++] = 'l';
- output[outputPos++] = 'j';
- break;
- case '\u1EFB': // ỻ [LATIN SMALL LETTER MIDDLE-WELSH LL]
- output[outputPos++] = 'l';
- output[outputPos++] = 'l';
- break;
- case '\u02AA': // ʪ [LATIN SMALL LETTER LS DIGRAPH]
- output[outputPos++] = 'l';
- output[outputPos++] = 's';
- break;
- case '\u02AB': // ʫ [LATIN SMALL LETTER LZ DIGRAPH]
- output[outputPos++] = 'l';
- output[outputPos++] = 'z';
- break;
- case '\u019C': // Ɯ [LATIN CAPITAL LETTER TURNED M]
- case '\u1D0D': // ᴍ [LATIN LETTER SMALL CAPITAL M]
- case '\u1E3E': // Ḿ [LATIN CAPITAL LETTER M WITH ACUTE]
- case '\u1E40': // Ṁ [LATIN CAPITAL LETTER M WITH DOT ABOVE]
- case '\u1E42': // Ṃ [LATIN CAPITAL LETTER M WITH DOT BELOW]
- case '\u24C2': // Ⓜ [CIRCLED LATIN CAPITAL LETTER M]
- case '\u2C6E': // Ɱ [LATIN CAPITAL LETTER M WITH HOOK]
- case '\uA7FD': // ꟽ [LATIN EPIGRAPHIC LETTER INVERTED M]
- case '\uA7FF': // ꟿ [LATIN EPIGRAPHIC LETTER ARCHAIC M]
- case '\uFF2D': // M [FULLWIDTH LATIN CAPITAL LETTER M]
- output[outputPos++] = 'M';
- break;
- case '\u026F': // ɯ [LATIN SMALL LETTER TURNED M]
- case '\u0270': // ɰ [LATIN SMALL LETTER TURNED M WITH LONG LEG]
- case '\u0271': // ɱ [LATIN SMALL LETTER M WITH HOOK]
- case '\u1D6F': // ᵯ [LATIN SMALL LETTER M WITH MIDDLE TILDE]
- case '\u1D86': // ᶆ [LATIN SMALL LETTER M WITH PALATAL HOOK]
- case '\u1E3F': // ḿ [LATIN SMALL LETTER M WITH ACUTE]
- case '\u1E41': // ṁ [LATIN SMALL LETTER M WITH DOT ABOVE]
- case '\u1E43': // ṃ [LATIN SMALL LETTER M WITH DOT BELOW]
- case '\u24DC': // ⓜ [CIRCLED LATIN SMALL LETTER M]
- case '\uFF4D': // m [FULLWIDTH LATIN SMALL LETTER M]
- output[outputPos++] = 'm';
- break;
- case '\u24A8': // ⒨ [PARENTHESIZED LATIN SMALL LETTER M]
- output[outputPos++] = '(';
- output[outputPos++] = 'm';
- output[outputPos++] = ')';
- break;
- case '\u00D1': // Ñ [LATIN CAPITAL LETTER N WITH TILDE]
- case '\u0143': // Ń [LATIN CAPITAL LETTER N WITH ACUTE]
- case '\u0145': // Ņ [LATIN CAPITAL LETTER N WITH CEDILLA]
- case '\u0147': // Ň [LATIN CAPITAL LETTER N WITH CARON]
- case '\u014A': // Ŋ http://en.wikipedia.org/wiki/Eng_(letter) [LATIN CAPITAL LETTER ENG]
- case '\u019D': // Ɲ [LATIN CAPITAL LETTER N WITH LEFT HOOK]
- case '\u01F8': // Ǹ [LATIN CAPITAL LETTER N WITH GRAVE]
- case '\u0220': // Ƞ [LATIN CAPITAL LETTER N WITH LONG RIGHT LEG]
- case '\u0274': // ɴ [LATIN LETTER SMALL CAPITAL N]
- case '\u1D0E': // ᴎ [LATIN LETTER SMALL CAPITAL REVERSED N]
- case '\u1E44': // Ṅ [LATIN CAPITAL LETTER N WITH DOT ABOVE]
- case '\u1E46': // Ṇ [LATIN CAPITAL LETTER N WITH DOT BELOW]
- case '\u1E48': // Ṉ [LATIN CAPITAL LETTER N WITH LINE BELOW]
- case '\u1E4A': // Ṋ [LATIN CAPITAL LETTER N WITH CIRCUMFLEX BELOW]
- case '\u24C3': // Ⓝ [CIRCLED LATIN CAPITAL LETTER N]
- case '\uFF2E': // N [FULLWIDTH LATIN CAPITAL LETTER N]
- output[outputPos++] = 'N';
- break;
- case '\u00F1': // ñ [LATIN SMALL LETTER N WITH TILDE]
- case '\u0144': // ń [LATIN SMALL LETTER N WITH ACUTE]
- case '\u0146': // ņ [LATIN SMALL LETTER N WITH CEDILLA]
- case '\u0148': // ň [LATIN SMALL LETTER N WITH CARON]
- case '\u0149': // ʼn [LATIN SMALL LETTER N PRECEDED BY APOSTROPHE]
- case '\u014B': // ŋ http://en.wikipedia.org/wiki/Eng_(letter) [LATIN SMALL LETTER ENG]
- case '\u019E': // ƞ [LATIN SMALL LETTER N WITH LONG RIGHT LEG]
- case '\u01F9': // ǹ [LATIN SMALL LETTER N WITH GRAVE]
- case '\u0235': // ȵ [LATIN SMALL LETTER N WITH CURL]
- case '\u0272': // ɲ [LATIN SMALL LETTER N WITH LEFT HOOK]
- case '\u0273': // ɳ [LATIN SMALL LETTER N WITH RETROFLEX HOOK]
- case '\u1D70': // ᵰ [LATIN SMALL LETTER N WITH MIDDLE TILDE]
- case '\u1D87': // ᶇ [LATIN SMALL LETTER N WITH PALATAL HOOK]
- case '\u1E45': // ṅ [LATIN SMALL LETTER N WITH DOT ABOVE]
- case '\u1E47': // ṇ [LATIN SMALL LETTER N WITH DOT BELOW]
- case '\u1E49': // ṉ [LATIN SMALL LETTER N WITH LINE BELOW]
- case '\u1E4B': // ṋ [LATIN SMALL LETTER N WITH CIRCUMFLEX BELOW]
- case '\u207F': // ⁿ [SUPERSCRIPT LATIN SMALL LETTER N]
- case '\u24DD': // ⓝ [CIRCLED LATIN SMALL LETTER N]
- case '\uFF4E': // n [FULLWIDTH LATIN SMALL LETTER N]
- output[outputPos++] = 'n';
- break;
- case '\u01CA': // NJ [LATIN CAPITAL LETTER NJ]
- output[outputPos++] = 'N';
- output[outputPos++] = 'J';
- break;
- case '\u01CB': // Nj [LATIN CAPITAL LETTER N WITH SMALL LETTER J]
- output[outputPos++] = 'N';
- output[outputPos++] = 'j';
- break;
- case '\u24A9': // ⒩ [PARENTHESIZED LATIN SMALL LETTER N]
- output[outputPos++] = '(';
- output[outputPos++] = 'n';
- output[outputPos++] = ')';
- break;
- case '\u01CC': // nj [LATIN SMALL LETTER NJ]
- output[outputPos++] = 'n';
- output[outputPos++] = 'j';
- break;
- case '\u00D2': // Ò [LATIN CAPITAL LETTER O WITH GRAVE]
- case '\u00D3': // Ó [LATIN CAPITAL LETTER O WITH ACUTE]
- case '\u00D4': // Ô [LATIN CAPITAL LETTER O WITH CIRCUMFLEX]
- case '\u00D5': // Õ [LATIN CAPITAL LETTER O WITH TILDE]
- case '\u00D6': // Ö [LATIN CAPITAL LETTER O WITH DIAERESIS]
- case '\u00D8': // Ø [LATIN CAPITAL LETTER O WITH STROKE]
- case '\u014C': // Ō [LATIN CAPITAL LETTER O WITH MACRON]
- case '\u014E': // Ŏ [LATIN CAPITAL LETTER O WITH BREVE]
- case '\u0150': // Ő [LATIN CAPITAL LETTER O WITH DOUBLE ACUTE]
- case '\u0186': // Ɔ [LATIN CAPITAL LETTER OPEN O]
- case '\u019F': // Ɵ [LATIN CAPITAL LETTER O WITH MIDDLE TILDE]
- case '\u01A0': // Ơ [LATIN CAPITAL LETTER O WITH HORN]
- case '\u01D1': // Ǒ [LATIN CAPITAL LETTER O WITH CARON]
- case '\u01EA': // Ǫ [LATIN CAPITAL LETTER O WITH OGONEK]
- case '\u01EC': // Ǭ [LATIN CAPITAL LETTER O WITH OGONEK AND MACRON]
- case '\u01FE': // Ǿ [LATIN CAPITAL LETTER O WITH STROKE AND ACUTE]
- case '\u020C': // Ȍ [LATIN CAPITAL LETTER O WITH DOUBLE GRAVE]
- case '\u020E': // Ȏ [LATIN CAPITAL LETTER O WITH INVERTED BREVE]
- case '\u022A': // Ȫ [LATIN CAPITAL LETTER O WITH DIAERESIS AND MACRON]
- case '\u022C': // Ȭ [LATIN CAPITAL LETTER O WITH TILDE AND MACRON]
- case '\u022E': // Ȯ [LATIN CAPITAL LETTER O WITH DOT ABOVE]
- case '\u0230': // Ȱ [LATIN CAPITAL LETTER O WITH DOT ABOVE AND MACRON]
- case '\u1D0F': // ᴏ [LATIN LETTER SMALL CAPITAL O]
- case '\u1D10': // ᴐ [LATIN LETTER SMALL CAPITAL OPEN O]
- case '\u1E4C': // Ṍ [LATIN CAPITAL LETTER O WITH TILDE AND ACUTE]
- case '\u1E4E': // Ṏ [LATIN CAPITAL LETTER O WITH TILDE AND DIAERESIS]
- case '\u1E50': // Ṑ [LATIN CAPITAL LETTER O WITH MACRON AND GRAVE]
- case '\u1E52': // Ṓ [LATIN CAPITAL LETTER O WITH MACRON AND ACUTE]
- case '\u1ECC': // Ọ [LATIN CAPITAL LETTER O WITH DOT BELOW]
- case '\u1ECE': // Ỏ [LATIN CAPITAL LETTER O WITH HOOK ABOVE]
- case '\u1ED0': // Ố [LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND ACUTE]
- case '\u1ED2': // Ồ [LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND GRAVE]
- case '\u1ED4': // Ổ [LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND HOOK ABOVE]
- case '\u1ED6': // Ỗ [LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND TILDE]
- case '\u1ED8': // Ộ [LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND DOT BELOW]
- case '\u1EDA': // Ớ [LATIN CAPITAL LETTER O WITH HORN AND ACUTE]
- case '\u1EDC': // Ờ [LATIN CAPITAL LETTER O WITH HORN AND GRAVE]
- case '\u1EDE': // Ở [LATIN CAPITAL LETTER O WITH HORN AND HOOK ABOVE]
- case '\u1EE0': // Ỡ [LATIN CAPITAL LETTER O WITH HORN AND TILDE]
- case '\u1EE2': // Ợ [LATIN CAPITAL LETTER O WITH HORN AND DOT BELOW]
- case '\u24C4': // Ⓞ [CIRCLED LATIN CAPITAL LETTER O]
- case '\uA74A': // Ꝋ [LATIN CAPITAL LETTER O WITH LONG STROKE OVERLAY]
- case '\uA74C': // Ꝍ [LATIN CAPITAL LETTER O WITH LOOP]
- case '\uFF2F': // O [FULLWIDTH LATIN CAPITAL LETTER O]
- output[outputPos++] = 'O';
- break;
- case '\u00F2': // ò [LATIN SMALL LETTER O WITH GRAVE]
- case '\u00F3': // ó [LATIN SMALL LETTER O WITH ACUTE]
- case '\u00F4': // ô [LATIN SMALL LETTER O WITH CIRCUMFLEX]
- case '\u00F5': // õ [LATIN SMALL LETTER O WITH TILDE]
- case '\u00F6': // ö [LATIN SMALL LETTER O WITH DIAERESIS]
- case '\u00F8': // ø [LATIN SMALL LETTER O WITH STROKE]
- case '\u014D': // ō [LATIN SMALL LETTER O WITH MACRON]
- case '\u014F': // ŏ [LATIN SMALL LETTER O WITH BREVE]
- case '\u0151': // ő [LATIN SMALL LETTER O WITH DOUBLE ACUTE]
- case '\u01A1': // ơ [LATIN SMALL LETTER O WITH HORN]
- case '\u01D2': // ǒ [LATIN SMALL LETTER O WITH CARON]
- case '\u01EB': // ǫ [LATIN SMALL LETTER O WITH OGONEK]
- case '\u01ED': // ǭ [LATIN SMALL LETTER O WITH OGONEK AND MACRON]
- case '\u01FF': // ǿ [LATIN SMALL LETTER O WITH STROKE AND ACUTE]
- case '\u020D': // ȍ [LATIN SMALL LETTER O WITH DOUBLE GRAVE]
- case '\u020F': // ȏ [LATIN SMALL LETTER O WITH INVERTED BREVE]
- case '\u022B': // ȫ [LATIN SMALL LETTER O WITH DIAERESIS AND MACRON]
- case '\u022D': // ȭ [LATIN SMALL LETTER O WITH TILDE AND MACRON]
- case '\u022F': // ȯ [LATIN SMALL LETTER O WITH DOT ABOVE]
- case '\u0231': // ȱ [LATIN SMALL LETTER O WITH DOT ABOVE AND MACRON]
- case '\u0254': // ɔ [LATIN SMALL LETTER OPEN O]
- case '\u0275': // ɵ [LATIN SMALL LETTER BARRED O]
- case '\u1D16': // ᴖ [LATIN SMALL LETTER TOP HALF O]
- case '\u1D17': // ᴗ [LATIN SMALL LETTER BOTTOM HALF O]
- case '\u1D97': // ᶗ [LATIN SMALL LETTER OPEN O WITH RETROFLEX HOOK]
- case '\u1E4D': // ṍ [LATIN SMALL LETTER O WITH TILDE AND ACUTE]
- case '\u1E4F': // ṏ [LATIN SMALL LETTER O WITH TILDE AND DIAERESIS]
- case '\u1E51': // ṑ [LATIN SMALL LETTER O WITH MACRON AND GRAVE]
- case '\u1E53': // ṓ [LATIN SMALL LETTER O WITH MACRON AND ACUTE]
- case '\u1ECD': // ọ [LATIN SMALL LETTER O WITH DOT BELOW]
- case '\u1ECF': // ỏ [LATIN SMALL LETTER O WITH HOOK ABOVE]
- case '\u1ED1': // ố [LATIN SMALL LETTER O WITH CIRCUMFLEX AND ACUTE]
- case '\u1ED3': // ồ [LATIN SMALL LETTER O WITH CIRCUMFLEX AND GRAVE]
- case '\u1ED5': // ổ [LATIN SMALL LETTER O WITH CIRCUMFLEX AND HOOK ABOVE]
- case '\u1ED7': // ỗ [LATIN SMALL LETTER O WITH CIRCUMFLEX AND TILDE]
- case '\u1ED9': // ộ [LATIN SMALL LETTER O WITH CIRCUMFLEX AND DOT BELOW]
- case '\u1EDB': // ớ [LATIN SMALL LETTER O WITH HORN AND ACUTE]
- case '\u1EDD': // ờ [LATIN SMALL LETTER O WITH HORN AND GRAVE]
- case '\u1EDF': // ở [LATIN SMALL LETTER O WITH HORN AND HOOK ABOVE]
- case '\u1EE1': // ỡ [LATIN SMALL LETTER O WITH HORN AND TILDE]
- case '\u1EE3': // ợ [LATIN SMALL LETTER O WITH HORN AND DOT BELOW]
- case '\u2092': // ₒ [LATIN SUBSCRIPT SMALL LETTER O]
- case '\u24DE': // ⓞ [CIRCLED LATIN SMALL LETTER O]
- case '\u2C7A': // ⱺ [LATIN SMALL LETTER O WITH LOW RING INSIDE]
- case '\uA74B': // ꝋ [LATIN SMALL LETTER O WITH LONG STROKE OVERLAY]
- case '\uA74D': // ꝍ [LATIN SMALL LETTER O WITH LOOP]
- case '\uFF4F': // o [FULLWIDTH LATIN SMALL LETTER O]
- output[outputPos++] = 'o';
- break;
- case '\u0152': // Œ [LATIN CAPITAL LIGATURE OE]
- case '\u0276': // ɶ [LATIN LETTER SMALL CAPITAL OE]
- output[outputPos++] = 'O';
- output[outputPos++] = 'E';
- break;
- case '\uA74E': // Ꝏ [LATIN CAPITAL LETTER OO]
- output[outputPos++] = 'O';
- output[outputPos++] = 'O';
- break;
- case '\u0222': // Ȣ http://en.wikipedia.org/wiki/OU [LATIN CAPITAL LETTER OU]
- case '\u1D15': // ᴕ [LATIN LETTER SMALL CAPITAL OU]
- output[outputPos++] = 'O';
- output[outputPos++] = 'U';
- break;
- case '\u24AA': // ⒪ [PARENTHESIZED LATIN SMALL LETTER O]
- output[outputPos++] = '(';
- output[outputPos++] = 'o';
- output[outputPos++] = ')';
- break;
- case '\u0153': // œ [LATIN SMALL LIGATURE OE]
- case '\u1D14': // ᴔ [LATIN SMALL LETTER TURNED OE]
- output[outputPos++] = 'o';
- output[outputPos++] = 'e';
- break;
- case '\uA74F': // ꝏ [LATIN SMALL LETTER OO]
- output[outputPos++] = 'o';
- output[outputPos++] = 'o';
- break;
- case '\u0223': // ȣ http://en.wikipedia.org/wiki/OU [LATIN SMALL LETTER OU]
- output[outputPos++] = 'o';
- output[outputPos++] = 'u';
- break;
- case '\u01A4': // Ƥ [LATIN CAPITAL LETTER P WITH HOOK]
- case '\u1D18': // ᴘ [LATIN LETTER SMALL CAPITAL P]
- case '\u1E54': // Ṕ [LATIN CAPITAL LETTER P WITH ACUTE]
- case '\u1E56': // Ṗ [LATIN CAPITAL LETTER P WITH DOT ABOVE]
- case '\u24C5': // Ⓟ [CIRCLED LATIN CAPITAL LETTER P]
- case '\u2C63': // Ᵽ [LATIN CAPITAL LETTER P WITH STROKE]
- case '\uA750': // Ꝑ [LATIN CAPITAL LETTER P WITH STROKE THROUGH DESCENDER]
- case '\uA752': // Ꝓ [LATIN CAPITAL LETTER P WITH FLOURISH]
- case '\uA754': // Ꝕ [LATIN CAPITAL LETTER P WITH SQUIRREL TAIL]
- case '\uFF30': // P [FULLWIDTH LATIN CAPITAL LETTER P]
- output[outputPos++] = 'P';
- break;
- case '\u01A5': // ƥ [LATIN SMALL LETTER P WITH HOOK]
- case '\u1D71': // ᵱ [LATIN SMALL LETTER P WITH MIDDLE TILDE]
- case '\u1D7D': // ᵽ [LATIN SMALL LETTER P WITH STROKE]
- case '\u1D88': // ᶈ [LATIN SMALL LETTER P WITH PALATAL HOOK]
- case '\u1E55': // ṕ [LATIN SMALL LETTER P WITH ACUTE]
- case '\u1E57': // ṗ [LATIN SMALL LETTER P WITH DOT ABOVE]
- case '\u24DF': // ⓟ [CIRCLED LATIN SMALL LETTER P]
- case '\uA751': // ꝑ [LATIN SMALL LETTER P WITH STROKE THROUGH DESCENDER]
- case '\uA753': // ꝓ [LATIN SMALL LETTER P WITH FLOURISH]
- case '\uA755': // ꝕ [LATIN SMALL LETTER P WITH SQUIRREL TAIL]
- case '\uA7FC': // ꟼ [LATIN EPIGRAPHIC LETTER REVERSED P]
- case '\uFF50': // p [FULLWIDTH LATIN SMALL LETTER P]
- output[outputPos++] = 'p';
- break;
- case '\u24AB': // ⒫ [PARENTHESIZED LATIN SMALL LETTER P]
- output[outputPos++] = '(';
- output[outputPos++] = 'p';
- output[outputPos++] = ')';
- break;
- case '\u024A': // Ɋ [LATIN CAPITAL LETTER SMALL Q WITH HOOK TAIL]
- case '\u24C6': // Ⓠ [CIRCLED LATIN CAPITAL LETTER Q]
- case '\uA756': // Ꝗ [LATIN CAPITAL LETTER Q WITH STROKE THROUGH DESCENDER]
- case '\uA758': // Ꝙ [LATIN CAPITAL LETTER Q WITH DIAGONAL STROKE]
- case '\uFF31': // Q [FULLWIDTH LATIN CAPITAL LETTER Q]
- output[outputPos++] = 'Q';
- break;
- case '\u0138': // ĸ http://en.wikipedia.org/wiki/Kra_(letter) [LATIN SMALL LETTER KRA]
- case '\u024B': // ɋ [LATIN SMALL LETTER Q WITH HOOK TAIL]
- case '\u02A0': // ʠ [LATIN SMALL LETTER Q WITH HOOK]
- case '\u24E0': // ⓠ [CIRCLED LATIN SMALL LETTER Q]
- case '\uA757': // ꝗ [LATIN SMALL LETTER Q WITH STROKE THROUGH DESCENDER]
- case '\uA759': // ꝙ [LATIN SMALL LETTER Q WITH DIAGONAL STROKE]
- case '\uFF51': // q [FULLWIDTH LATIN SMALL LETTER Q]
- output[outputPos++] = 'q';
- break;
- case '\u24AC': // ⒬ [PARENTHESIZED LATIN SMALL LETTER Q]
- output[outputPos++] = '(';
- output[outputPos++] = 'q';
- output[outputPos++] = ')';
- break;
- case '\u0239': // ȹ [LATIN SMALL LETTER QP DIGRAPH]
- output[outputPos++] = 'q';
- output[outputPos++] = 'p';
- break;
- case '\u0154': // Ŕ [LATIN CAPITAL LETTER R WITH ACUTE]
- case '\u0156': // Ŗ [LATIN CAPITAL LETTER R WITH CEDILLA]
- case '\u0158': // Ř [LATIN CAPITAL LETTER R WITH CARON]
- case '\u0210': // Ȓ [LATIN CAPITAL LETTER R WITH DOUBLE GRAVE]
- case '\u0212': // Ȓ [LATIN CAPITAL LETTER R WITH INVERTED BREVE]
- case '\u024C': // Ɍ [LATIN CAPITAL LETTER R WITH STROKE]
- case '\u0280': // ʀ [LATIN LETTER SMALL CAPITAL R]
- case '\u0281': // ʁ [LATIN LETTER SMALL CAPITAL INVERTED R]
- case '\u1D19': // ᴙ [LATIN LETTER SMALL CAPITAL REVERSED R]
- case '\u1D1A': // ᴚ [LATIN LETTER SMALL CAPITAL TURNED R]
- case '\u1E58': // Ṙ [LATIN CAPITAL LETTER R WITH DOT ABOVE]
- case '\u1E5A': // Ṛ [LATIN CAPITAL LETTER R WITH DOT BELOW]
- case '\u1E5C': // Ṝ [LATIN CAPITAL LETTER R WITH DOT BELOW AND MACRON]
- case '\u1E5E': // Ṟ [LATIN CAPITAL LETTER R WITH LINE BELOW]
- case '\u24C7': // Ⓡ [CIRCLED LATIN CAPITAL LETTER R]
- case '\u2C64': // Ɽ [LATIN CAPITAL LETTER R WITH TAIL]
- case '\uA75A': // Ꝛ [LATIN CAPITAL LETTER R ROTUNDA]
- case '\uA782': // Ꞃ [LATIN CAPITAL LETTER INSULAR R]
- case '\uFF32': // R [FULLWIDTH LATIN CAPITAL LETTER R]
- output[outputPos++] = 'R';
- break;
- case '\u0155': // ŕ [LATIN SMALL LETTER R WITH ACUTE]
- case '\u0157': // ŗ [LATIN SMALL LETTER R WITH CEDILLA]
- case '\u0159': // ř [LATIN SMALL LETTER R WITH CARON]
- case '\u0211': // ȑ [LATIN SMALL LETTER R WITH DOUBLE GRAVE]
- case '\u0213': // ȓ [LATIN SMALL LETTER R WITH INVERTED BREVE]
- case '\u024D': // ɍ [LATIN SMALL LETTER R WITH STROKE]
- case '\u027C': // ɼ [LATIN SMALL LETTER R WITH LONG LEG]
- case '\u027D': // ɽ [LATIN SMALL LETTER R WITH TAIL]
- case '\u027E': // ɾ [LATIN SMALL LETTER R WITH FISHHOOK]
- case '\u027F': // ɿ [LATIN SMALL LETTER REVERSED R WITH FISHHOOK]
- case '\u1D63': // ᵣ [LATIN SUBSCRIPT SMALL LETTER R]
- case '\u1D72': // ᵲ [LATIN SMALL LETTER R WITH MIDDLE TILDE]
- case '\u1D73': // ᵳ [LATIN SMALL LETTER R WITH FISHHOOK AND MIDDLE TILDE]
- case '\u1D89': // ᶉ [LATIN SMALL LETTER R WITH PALATAL HOOK]
- case '\u1E59': // ṙ [LATIN SMALL LETTER R WITH DOT ABOVE]
- case '\u1E5B': // ṛ [LATIN SMALL LETTER R WITH DOT BELOW]
- case '\u1E5D': // ṝ [LATIN SMALL LETTER R WITH DOT BELOW AND MACRON]
- case '\u1E5F': // ṟ [LATIN SMALL LETTER R WITH LINE BELOW]
- case '\u24E1': // ⓡ [CIRCLED LATIN SMALL LETTER R]
- case '\uA75B': // ꝛ [LATIN SMALL LETTER R ROTUNDA]
- case '\uA783': // ꞃ [LATIN SMALL LETTER INSULAR R]
- case '\uFF52': // r [FULLWIDTH LATIN SMALL LETTER R]
- output[outputPos++] = 'r';
- break;
- case '\u24AD': // ⒭ [PARENTHESIZED LATIN SMALL LETTER R]
- output[outputPos++] = '(';
- output[outputPos++] = 'r';
- output[outputPos++] = ')';
- break;
- case '\u015A': // Ś [LATIN CAPITAL LETTER S WITH ACUTE]
- case '\u015C': // Ŝ [LATIN CAPITAL LETTER S WITH CIRCUMFLEX]
- case '\u015E': // Ş [LATIN CAPITAL LETTER S WITH CEDILLA]
- case '\u0160': // Š [LATIN CAPITAL LETTER S WITH CARON]
- case '\u0218': // Ș [LATIN CAPITAL LETTER S WITH COMMA BELOW]
- case '\u1E60': // Ṡ [LATIN CAPITAL LETTER S WITH DOT ABOVE]
- case '\u1E62': // Ṣ [LATIN CAPITAL LETTER S WITH DOT BELOW]
- case '\u1E64': // Ṥ [LATIN CAPITAL LETTER S WITH ACUTE AND DOT ABOVE]
- case '\u1E66': // Ṧ [LATIN CAPITAL LETTER S WITH CARON AND DOT ABOVE]
- case '\u1E68': // Ṩ [LATIN CAPITAL LETTER S WITH DOT BELOW AND DOT ABOVE]
- case '\u24C8': // Ⓢ [CIRCLED LATIN CAPITAL LETTER S]
- case '\uA731': // ꜱ [LATIN LETTER SMALL CAPITAL S]
- case '\uA785': // ꞅ [LATIN SMALL LETTER INSULAR S]
- case '\uFF33': // S [FULLWIDTH LATIN CAPITAL LETTER S]
- output[outputPos++] = 'S';
- break;
- case '\u015B': // ś [LATIN SMALL LETTER S WITH ACUTE]
- case '\u015D': // ŝ [LATIN SMALL LETTER S WITH CIRCUMFLEX]
- case '\u015F': // ş [LATIN SMALL LETTER S WITH CEDILLA]
- case '\u0161': // š [LATIN SMALL LETTER S WITH CARON]
- case '\u017F': // ſ http://en.wikipedia.org/wiki/Long_S [LATIN SMALL LETTER LONG S]
- case '\u0219': // ș [LATIN SMALL LETTER S WITH COMMA BELOW]
- case '\u023F': // ȿ [LATIN SMALL LETTER S WITH SWASH TAIL]
- case '\u0282': // ʂ [LATIN SMALL LETTER S WITH HOOK]
- case '\u1D74': // ᵴ [LATIN SMALL LETTER S WITH MIDDLE TILDE]
- case '\u1D8A': // ᶊ [LATIN SMALL LETTER S WITH PALATAL HOOK]
- case '\u1E61': // ṡ [LATIN SMALL LETTER S WITH DOT ABOVE]
- case '\u1E63': // ṣ [LATIN SMALL LETTER S WITH DOT BELOW]
- case '\u1E65': // ṥ [LATIN SMALL LETTER S WITH ACUTE AND DOT ABOVE]
- case '\u1E67': // ṧ [LATIN SMALL LETTER S WITH CARON AND DOT ABOVE]
- case '\u1E69': // ṩ [LATIN SMALL LETTER S WITH DOT BELOW AND DOT ABOVE]
- case '\u1E9C': // ẜ [LATIN SMALL LETTER LONG S WITH DIAGONAL STROKE]
- case '\u1E9D': // ẝ [LATIN SMALL LETTER LONG S WITH HIGH STROKE]
- case '\u24E2': // ⓢ [CIRCLED LATIN SMALL LETTER S]
- case '\uA784': // Ꞅ [LATIN CAPITAL LETTER INSULAR S]
- case '\uFF53': // s [FULLWIDTH LATIN SMALL LETTER S]
- output[outputPos++] = 's';
- break;
- case '\u1E9E': // ẞ [LATIN CAPITAL LETTER SHARP S]
- output[outputPos++] = 'S';
- output[outputPos++] = 'S';
- break;
- case '\u24AE': // ⒮ [PARENTHESIZED LATIN SMALL LETTER S]
- output[outputPos++] = '(';
- output[outputPos++] = 's';
- output[outputPos++] = ')';
- break;
- case '\u00DF': // ß [LATIN SMALL LETTER SHARP S]
- output[outputPos++] = 's';
- output[outputPos++] = 's';
- break;
- case '\uFB06': // st [LATIN SMALL LIGATURE ST]
- output[outputPos++] = 's';
- output[outputPos++] = 't';
- break;
- case '\u0162': // Ţ [LATIN CAPITAL LETTER T WITH CEDILLA]
- case '\u0164': // Ť [LATIN CAPITAL LETTER T WITH CARON]
- case '\u0166': // Ŧ [LATIN CAPITAL LETTER T WITH STROKE]
- case '\u01AC': // Ƭ [LATIN CAPITAL LETTER T WITH HOOK]
- case '\u01AE': // Ʈ [LATIN CAPITAL LETTER T WITH RETROFLEX HOOK]
- case '\u021A': // Ț [LATIN CAPITAL LETTER T WITH COMMA BELOW]
- case '\u023E': // Ⱦ [LATIN CAPITAL LETTER T WITH DIAGONAL STROKE]
- case '\u1D1B': // ᴛ [LATIN LETTER SMALL CAPITAL T]
- case '\u1E6A': // Ṫ [LATIN CAPITAL LETTER T WITH DOT ABOVE]
- case '\u1E6C': // Ṭ [LATIN CAPITAL LETTER T WITH DOT BELOW]
- case '\u1E6E': // Ṯ [LATIN CAPITAL LETTER T WITH LINE BELOW]
- case '\u1E70': // Ṱ [LATIN CAPITAL LETTER T WITH CIRCUMFLEX BELOW]
- case '\u24C9': // Ⓣ [CIRCLED LATIN CAPITAL LETTER T]
- case '\uA786': // Ꞇ [LATIN CAPITAL LETTER INSULAR T]
- case '\uFF34': // T [FULLWIDTH LATIN CAPITAL LETTER T]
- output[outputPos++] = 'T';
- break;
- case '\u0163': // ţ [LATIN SMALL LETTER T WITH CEDILLA]
- case '\u0165': // ť [LATIN SMALL LETTER T WITH CARON]
- case '\u0167': // ŧ [LATIN SMALL LETTER T WITH STROKE]
- case '\u01AB': // ƫ [LATIN SMALL LETTER T WITH PALATAL HOOK]
- case '\u01AD': // ƭ [LATIN SMALL LETTER T WITH HOOK]
- case '\u021B': // ț [LATIN SMALL LETTER T WITH COMMA BELOW]
- case '\u0236': // ȶ [LATIN SMALL LETTER T WITH CURL]
- case '\u0287': // ʇ [LATIN SMALL LETTER TURNED T]
- case '\u0288': // ʈ [LATIN SMALL LETTER T WITH RETROFLEX HOOK]
- case '\u1D75': // ᵵ [LATIN SMALL LETTER T WITH MIDDLE TILDE]
- case '\u1E6B': // ṫ [LATIN SMALL LETTER T WITH DOT ABOVE]
- case '\u1E6D': // ṭ [LATIN SMALL LETTER T WITH DOT BELOW]
- case '\u1E6F': // ṯ [LATIN SMALL LETTER T WITH LINE BELOW]
- case '\u1E71': // ṱ [LATIN SMALL LETTER T WITH CIRCUMFLEX BELOW]
- case '\u1E97': // ẗ [LATIN SMALL LETTER T WITH DIAERESIS]
- case '\u24E3': // ⓣ [CIRCLED LATIN SMALL LETTER T]
- case '\u2C66': // ⱦ [LATIN SMALL LETTER T WITH DIAGONAL STROKE]
- case '\uFF54': // t [FULLWIDTH LATIN SMALL LETTER T]
- output[outputPos++] = 't';
- break;
- case '\u00DE': // Þ [LATIN CAPITAL LETTER THORN]
- case '\uA766': // Ꝧ [LATIN CAPITAL LETTER THORN WITH STROKE THROUGH DESCENDER]
- output[outputPos++] = 'T';
- output[outputPos++] = 'H';
- break;
- case '\uA728': // Ꜩ [LATIN CAPITAL LETTER TZ]
- output[outputPos++] = 'T';
- output[outputPos++] = 'Z';
- break;
- case '\u24AF': // ⒯ [PARENTHESIZED LATIN SMALL LETTER T]
- output[outputPos++] = '(';
- output[outputPos++] = 't';
- output[outputPos++] = ')';
- break;
- case '\u02A8': // ʨ [LATIN SMALL LETTER TC DIGRAPH WITH CURL]
- output[outputPos++] = 't';
- output[outputPos++] = 'c';
- break;
- case '\u00FE': // þ [LATIN SMALL LETTER THORN]
- case '\u1D7A': // ᵺ [LATIN SMALL LETTER TH WITH STRIKETHROUGH]
- case '\uA767': // ꝧ [LATIN SMALL LETTER THORN WITH STROKE THROUGH DESCENDER]
- output[outputPos++] = 't';
- output[outputPos++] = 'h';
- break;
- case '\u02A6': // ʦ [LATIN SMALL LETTER TS DIGRAPH]
- output[outputPos++] = 't';
- output[outputPos++] = 's';
- break;
- case '\uA729': // ꜩ [LATIN SMALL LETTER TZ]
- output[outputPos++] = 't';
- output[outputPos++] = 'z';
- break;
- case '\u00D9': // Ù [LATIN CAPITAL LETTER U WITH GRAVE]
- case '\u00DA': // Ú [LATIN CAPITAL LETTER U WITH ACUTE]
- case '\u00DB': // Û [LATIN CAPITAL LETTER U WITH CIRCUMFLEX]
- case '\u00DC': // Ü [LATIN CAPITAL LETTER U WITH DIAERESIS]
- case '\u0168': // Ũ [LATIN CAPITAL LETTER U WITH TILDE]
- case '\u016A': // Ū [LATIN CAPITAL LETTER U WITH MACRON]
- case '\u016C': // Ŭ [LATIN CAPITAL LETTER U WITH BREVE]
- case '\u016E': // Ů [LATIN CAPITAL LETTER U WITH RING ABOVE]
- case '\u0170': // Ű [LATIN CAPITAL LETTER U WITH DOUBLE ACUTE]
- case '\u0172': // Ų [LATIN CAPITAL LETTER U WITH OGONEK]
- case '\u01AF': // Ư [LATIN CAPITAL LETTER U WITH HORN]
- case '\u01D3': // Ǔ [LATIN CAPITAL LETTER U WITH CARON]
- case '\u01D5': // Ǖ [LATIN CAPITAL LETTER U WITH DIAERESIS AND MACRON]
- case '\u01D7': // Ǘ [LATIN CAPITAL LETTER U WITH DIAERESIS AND ACUTE]
- case '\u01D9': // Ǚ [LATIN CAPITAL LETTER U WITH DIAERESIS AND CARON]
- case '\u01DB': // Ǜ [LATIN CAPITAL LETTER U WITH DIAERESIS AND GRAVE]
- case '\u0214': // Ȕ [LATIN CAPITAL LETTER U WITH DOUBLE GRAVE]
- case '\u0216': // Ȗ [LATIN CAPITAL LETTER U WITH INVERTED BREVE]
- case '\u0244': // Ʉ [LATIN CAPITAL LETTER U BAR]
- case '\u1D1C': // ᴜ [LATIN LETTER SMALL CAPITAL U]
- case '\u1D7E': // ᵾ [LATIN SMALL CAPITAL LETTER U WITH STROKE]
- case '\u1E72': // Ṳ [LATIN CAPITAL LETTER U WITH DIAERESIS BELOW]
- case '\u1E74': // Ṵ [LATIN CAPITAL LETTER U WITH TILDE BELOW]
- case '\u1E76': // Ṷ [LATIN CAPITAL LETTER U WITH CIRCUMFLEX BELOW]
- case '\u1E78': // Ṹ [LATIN CAPITAL LETTER U WITH TILDE AND ACUTE]
- case '\u1E7A': // Ṻ [LATIN CAPITAL LETTER U WITH MACRON AND DIAERESIS]
- case '\u1EE4': // Ụ [LATIN CAPITAL LETTER U WITH DOT BELOW]
- case '\u1EE6': // Ủ [LATIN CAPITAL LETTER U WITH HOOK ABOVE]
- case '\u1EE8': // Ứ [LATIN CAPITAL LETTER U WITH HORN AND ACUTE]
- case '\u1EEA': // Ừ [LATIN CAPITAL LETTER U WITH HORN AND GRAVE]
- case '\u1EEC': // Ử [LATIN CAPITAL LETTER U WITH HORN AND HOOK ABOVE]
- case '\u1EEE': // Ữ [LATIN CAPITAL LETTER U WITH HORN AND TILDE]
- case '\u1EF0': // Ự [LATIN CAPITAL LETTER U WITH HORN AND DOT BELOW]
- case '\u24CA': // Ⓤ [CIRCLED LATIN CAPITAL LETTER U]
- case '\uFF35': // U [FULLWIDTH LATIN CAPITAL LETTER U]
- output[outputPos++] = 'U';
- break;
- case '\u00F9': // ù [LATIN SMALL LETTER U WITH GRAVE]
- case '\u00FA': // ú [LATIN SMALL LETTER U WITH ACUTE]
- case '\u00FB': // û [LATIN SMALL LETTER U WITH CIRCUMFLEX]
- case '\u00FC': // ü [LATIN SMALL LETTER U WITH DIAERESIS]
- case '\u0169': // ũ [LATIN SMALL LETTER U WITH TILDE]
- case '\u016B': // ū [LATIN SMALL LETTER U WITH MACRON]
- case '\u016D': // ŭ [LATIN SMALL LETTER U WITH BREVE]
- case '\u016F': // ů [LATIN SMALL LETTER U WITH RING ABOVE]
- case '\u0171': // ű [LATIN SMALL LETTER U WITH DOUBLE ACUTE]
- case '\u0173': // ų [LATIN SMALL LETTER U WITH OGONEK]
- case '\u01B0': // ư [LATIN SMALL LETTER U WITH HORN]
- case '\u01D4': // ǔ [LATIN SMALL LETTER U WITH CARON]
- case '\u01D6': // ǖ [LATIN SMALL LETTER U WITH DIAERESIS AND MACRON]
- case '\u01D8': // ǘ [LATIN SMALL LETTER U WITH DIAERESIS AND ACUTE]
- case '\u01DA': // ǚ [LATIN SMALL LETTER U WITH DIAERESIS AND CARON]
- case '\u01DC': // ǜ [LATIN SMALL LETTER U WITH DIAERESIS AND GRAVE]
- case '\u0215': // ȕ [LATIN SMALL LETTER U WITH DOUBLE GRAVE]
- case '\u0217': // ȗ [LATIN SMALL LETTER U WITH INVERTED BREVE]
- case '\u0289': // ʉ [LATIN SMALL LETTER U BAR]
- case '\u1D64': // ᵤ [LATIN SUBSCRIPT SMALL LETTER U]
- case '\u1D99': // ᶙ [LATIN SMALL LETTER U WITH RETROFLEX HOOK]
- case '\u1E73': // ṳ [LATIN SMALL LETTER U WITH DIAERESIS BELOW]
- case '\u1E75': // ṵ [LATIN SMALL LETTER U WITH TILDE BELOW]
- case '\u1E77': // ṷ [LATIN SMALL LETTER U WITH CIRCUMFLEX BELOW]
- case '\u1E79': // ṹ [LATIN SMALL LETTER U WITH TILDE AND ACUTE]
- case '\u1E7B': // ṻ [LATIN SMALL LETTER U WITH MACRON AND DIAERESIS]
- case '\u1EE5': // ụ [LATIN SMALL LETTER U WITH DOT BELOW]
- case '\u1EE7': // ủ [LATIN SMALL LETTER U WITH HOOK ABOVE]
- case '\u1EE9': // ứ [LATIN SMALL LETTER U WITH HORN AND ACUTE]
- case '\u1EEB': // ừ [LATIN SMALL LETTER U WITH HORN AND GRAVE]
- case '\u1EED': // ử [LATIN SMALL LETTER U WITH HORN AND HOOK ABOVE]
- case '\u1EEF': // ữ [LATIN SMALL LETTER U WITH HORN AND TILDE]
- case '\u1EF1': // ự [LATIN SMALL LETTER U WITH HORN AND DOT BELOW]
- case '\u24E4': // ⓤ [CIRCLED LATIN SMALL LETTER U]
- case '\uFF55': // u [FULLWIDTH LATIN SMALL LETTER U]
- output[outputPos++] = 'u';
- break;
- case '\u24B0': // ⒰ [PARENTHESIZED LATIN SMALL LETTER U]
- output[outputPos++] = '(';
- output[outputPos++] = 'u';
- output[outputPos++] = ')';
- break;
- case '\u1D6B': // ᵫ [LATIN SMALL LETTER UE]
- output[outputPos++] = 'u';
- output[outputPos++] = 'e';
- break;
- case '\u01B2': // Ʋ [LATIN CAPITAL LETTER V WITH HOOK]
- case '\u0245': // Ʌ [LATIN CAPITAL LETTER TURNED V]
- case '\u1D20': // ᴠ [LATIN LETTER SMALL CAPITAL V]
- case '\u1E7C': // Ṽ [LATIN CAPITAL LETTER V WITH TILDE]
- case '\u1E7E': // Ṿ [LATIN CAPITAL LETTER V WITH DOT BELOW]
- case '\u1EFC': // Ỽ [LATIN CAPITAL LETTER MIDDLE-WELSH V]
- case '\u24CB': // Ⓥ [CIRCLED LATIN CAPITAL LETTER V]
- case '\uA75E': // Ꝟ [LATIN CAPITAL LETTER V WITH DIAGONAL STROKE]
- case '\uA768': // Ꝩ [LATIN CAPITAL LETTER VEND]
- case '\uFF36': // V [FULLWIDTH LATIN CAPITAL LETTER V]
- output[outputPos++] = 'V';
- break;
- case '\u028B': // ʋ [LATIN SMALL LETTER V WITH HOOK]
- case '\u028C': // ʌ [LATIN SMALL LETTER TURNED V]
- case '\u1D65': // ᵥ [LATIN SUBSCRIPT SMALL LETTER V]
- case '\u1D8C': // ᶌ [LATIN SMALL LETTER V WITH PALATAL HOOK]
- case '\u1E7D': // ṽ [LATIN SMALL LETTER V WITH TILDE]
- case '\u1E7F': // ṿ [LATIN SMALL LETTER V WITH DOT BELOW]
- case '\u24E5': // ⓥ [CIRCLED LATIN SMALL LETTER V]
- case '\u2C71': // ⱱ [LATIN SMALL LETTER V WITH RIGHT HOOK]
- case '\u2C74': // ⱴ [LATIN SMALL LETTER V WITH CURL]
- case '\uA75F': // ꝟ [LATIN SMALL LETTER V WITH DIAGONAL STROKE]
- case '\uFF56': // v [FULLWIDTH LATIN SMALL LETTER V]
- output[outputPos++] = 'v';
- break;
- case '\uA760': // Ꝡ [LATIN CAPITAL LETTER VY]
- output[outputPos++] = 'V';
- output[outputPos++] = 'Y';
- break;
- case '\u24B1': // ⒱ [PARENTHESIZED LATIN SMALL LETTER V]
- output[outputPos++] = '(';
- output[outputPos++] = 'v';
- output[outputPos++] = ')';
- break;
- case '\uA761': // ꝡ [LATIN SMALL LETTER VY]
- output[outputPos++] = 'v';
- output[outputPos++] = 'y';
- break;
- case '\u0174': // Ŵ [LATIN CAPITAL LETTER W WITH CIRCUMFLEX]
- case '\u01F7': // Ƿ http://en.wikipedia.org/wiki/Wynn [LATIN CAPITAL LETTER WYNN]
- case '\u1D21': // ᴡ [LATIN LETTER SMALL CAPITAL W]
- case '\u1E80': // Ẁ [LATIN CAPITAL LETTER W WITH GRAVE]
- case '\u1E82': // Ẃ [LATIN CAPITAL LETTER W WITH ACUTE]
- case '\u1E84': // Ẅ [LATIN CAPITAL LETTER W WITH DIAERESIS]
- case '\u1E86': // Ẇ [LATIN CAPITAL LETTER W WITH DOT ABOVE]
- case '\u1E88': // Ẉ [LATIN CAPITAL LETTER W WITH DOT BELOW]
- case '\u24CC': // Ⓦ [CIRCLED LATIN CAPITAL LETTER W]
- case '\u2C72': // Ⱳ [LATIN CAPITAL LETTER W WITH HOOK]
- case '\uFF37': // W [FULLWIDTH LATIN CAPITAL LETTER W]
- output[outputPos++] = 'W';
- break;
- case '\u0175': // ŵ [LATIN SMALL LETTER W WITH CIRCUMFLEX]
- case '\u01BF': // ƿ http://en.wikipedia.org/wiki/Wynn [LATIN LETTER WYNN]
- case '\u028D': // ʍ [LATIN SMALL LETTER TURNED W]
- case '\u1E81': // ẁ [LATIN SMALL LETTER W WITH GRAVE]
- case '\u1E83': // ẃ [LATIN SMALL LETTER W WITH ACUTE]
- case '\u1E85': // ẅ [LATIN SMALL LETTER W WITH DIAERESIS]
- case '\u1E87': // ẇ [LATIN SMALL LETTER W WITH DOT ABOVE]
- case '\u1E89': // ẉ [LATIN SMALL LETTER W WITH DOT BELOW]
- case '\u1E98': // ẘ [LATIN SMALL LETTER W WITH RING ABOVE]
- case '\u24E6': // ⓦ [CIRCLED LATIN SMALL LETTER W]
- case '\u2C73': // ⱳ [LATIN SMALL LETTER W WITH HOOK]
- case '\uFF57': // w [FULLWIDTH LATIN SMALL LETTER W]
- output[outputPos++] = 'w';
- break;
- case '\u24B2': // ⒲ [PARENTHESIZED LATIN SMALL LETTER W]
- output[outputPos++] = '(';
- output[outputPos++] = 'w';
- output[outputPos++] = ')';
- break;
- case '\u1E8A': // Ẋ [LATIN CAPITAL LETTER X WITH DOT ABOVE]
- case '\u1E8C': // Ẍ [LATIN CAPITAL LETTER X WITH DIAERESIS]
- case '\u24CD': // Ⓧ [CIRCLED LATIN CAPITAL LETTER X]
- case '\uFF38': // X [FULLWIDTH LATIN CAPITAL LETTER X]
- output[outputPos++] = 'X';
- break;
- case '\u1D8D': // ᶍ [LATIN SMALL LETTER X WITH PALATAL HOOK]
- case '\u1E8B': // ẋ [LATIN SMALL LETTER X WITH DOT ABOVE]
- case '\u1E8D': // ẍ [LATIN SMALL LETTER X WITH DIAERESIS]
- case '\u2093': // ₓ [LATIN SUBSCRIPT SMALL LETTER X]
- case '\u24E7': // ⓧ [CIRCLED LATIN SMALL LETTER X]
- case '\uFF58': // x [FULLWIDTH LATIN SMALL LETTER X]
- output[outputPos++] = 'x';
- break;
- case '\u24B3': // ⒳ [PARENTHESIZED LATIN SMALL LETTER X]
- output[outputPos++] = '(';
- output[outputPos++] = 'x';
- output[outputPos++] = ')';
- break;
- case '\u00DD': // Ý [LATIN CAPITAL LETTER Y WITH ACUTE]
- case '\u0176': // Ŷ [LATIN CAPITAL LETTER Y WITH CIRCUMFLEX]
- case '\u0178': // Ÿ [LATIN CAPITAL LETTER Y WITH DIAERESIS]
- case '\u01B3': // Ƴ [LATIN CAPITAL LETTER Y WITH HOOK]
- case '\u0232': // Ȳ [LATIN CAPITAL LETTER Y WITH MACRON]
- case '\u024E': // Ɏ [LATIN CAPITAL LETTER Y WITH STROKE]
- case '\u028F': // ʏ [LATIN LETTER SMALL CAPITAL Y]
- case '\u1E8E': // Ẏ [LATIN CAPITAL LETTER Y WITH DOT ABOVE]
- case '\u1EF2': // Ỳ [LATIN CAPITAL LETTER Y WITH GRAVE]
- case '\u1EF4': // Ỵ [LATIN CAPITAL LETTER Y WITH DOT BELOW]
- case '\u1EF6': // Ỷ [LATIN CAPITAL LETTER Y WITH HOOK ABOVE]
- case '\u1EF8': // Ỹ [LATIN CAPITAL LETTER Y WITH TILDE]
- case '\u1EFE': // Ỿ [LATIN CAPITAL LETTER Y WITH LOOP]
- case '\u24CE': // Ⓨ [CIRCLED LATIN CAPITAL LETTER Y]
- case '\uFF39': // Y [FULLWIDTH LATIN CAPITAL LETTER Y]
- output[outputPos++] = 'Y';
- break;
- case '\u00FD': // ý [LATIN SMALL LETTER Y WITH ACUTE]
- case '\u00FF': // ÿ [LATIN SMALL LETTER Y WITH DIAERESIS]
- case '\u0177': // ŷ [LATIN SMALL LETTER Y WITH CIRCUMFLEX]
- case '\u01B4': // ƴ [LATIN SMALL LETTER Y WITH HOOK]
- case '\u0233': // ȳ [LATIN SMALL LETTER Y WITH MACRON]
- case '\u024F': // ɏ [LATIN SMALL LETTER Y WITH STROKE]
- case '\u028E': // ʎ [LATIN SMALL LETTER TURNED Y]
- case '\u1E8F': // ẏ [LATIN SMALL LETTER Y WITH DOT ABOVE]
- case '\u1E99': // ẙ [LATIN SMALL LETTER Y WITH RING ABOVE]
- case '\u1EF3': // ỳ [LATIN SMALL LETTER Y WITH GRAVE]
- case '\u1EF5': // ỵ [LATIN SMALL LETTER Y WITH DOT BELOW]
- case '\u1EF7': // ỷ [LATIN SMALL LETTER Y WITH HOOK ABOVE]
- case '\u1EF9': // ỹ [LATIN SMALL LETTER Y WITH TILDE]
- case '\u1EFF': // ỿ [LATIN SMALL LETTER Y WITH LOOP]
- case '\u24E8': // ⓨ [CIRCLED LATIN SMALL LETTER Y]
- case '\uFF59': // y [FULLWIDTH LATIN SMALL LETTER Y]
- output[outputPos++] = 'y';
- break;
- case '\u24B4': // ⒴ [PARENTHESIZED LATIN SMALL LETTER Y]
- output[outputPos++] = '(';
- output[outputPos++] = 'y';
- output[outputPos++] = ')';
- break;
- case '\u0179': // Ź [LATIN CAPITAL LETTER Z WITH ACUTE]
- case '\u017B': // Ż [LATIN CAPITAL LETTER Z WITH DOT ABOVE]
- case '\u017D': // Ž [LATIN CAPITAL LETTER Z WITH CARON]
- case '\u01B5': // Ƶ [LATIN CAPITAL LETTER Z WITH STROKE]
- case '\u021C': // Ȝ http://en.wikipedia.org/wiki/Yogh [LATIN CAPITAL LETTER YOGH]
- case '\u0224': // Ȥ [LATIN CAPITAL LETTER Z WITH HOOK]
- case '\u1D22': // ᴢ [LATIN LETTER SMALL CAPITAL Z]
- case '\u1E90': // Ẑ [LATIN CAPITAL LETTER Z WITH CIRCUMFLEX]
- case '\u1E92': // Ẓ [LATIN CAPITAL LETTER Z WITH DOT BELOW]
- case '\u1E94': // Ẕ [LATIN CAPITAL LETTER Z WITH LINE BELOW]
- case '\u24CF': // Ⓩ [CIRCLED LATIN CAPITAL LETTER Z]
- case '\u2C6B': // Ⱬ [LATIN CAPITAL LETTER Z WITH DESCENDER]
- case '\uA762': // Ꝣ [LATIN CAPITAL LETTER VISIGOTHIC Z]
- case '\uFF3A': // Z [FULLWIDTH LATIN CAPITAL LETTER Z]
- output[outputPos++] = 'Z';
- break;
- case '\u017A': // ź [LATIN SMALL LETTER Z WITH ACUTE]
- case '\u017C': // ż [LATIN SMALL LETTER Z WITH DOT ABOVE]
- case '\u017E': // ž [LATIN SMALL LETTER Z WITH CARON]
- case '\u01B6': // ƶ [LATIN SMALL LETTER Z WITH STROKE]
- case '\u021D': // ȝ http://en.wikipedia.org/wiki/Yogh [LATIN SMALL LETTER YOGH]
- case '\u0225': // ȥ [LATIN SMALL LETTER Z WITH HOOK]
- case '\u0240': // ɀ [LATIN SMALL LETTER Z WITH SWASH TAIL]
- case '\u0290': // ʐ [LATIN SMALL LETTER Z WITH RETROFLEX HOOK]
- case '\u0291': // ʑ [LATIN SMALL LETTER Z WITH CURL]
- case '\u1D76': // ᵶ [LATIN SMALL LETTER Z WITH MIDDLE TILDE]
- case '\u1D8E': // ᶎ [LATIN SMALL LETTER Z WITH PALATAL HOOK]
- case '\u1E91': // ẑ [LATIN SMALL LETTER Z WITH CIRCUMFLEX]
- case '\u1E93': // ẓ [LATIN SMALL LETTER Z WITH DOT BELOW]
- case '\u1E95': // ẕ [LATIN SMALL LETTER Z WITH LINE BELOW]
- case '\u24E9': // ⓩ [CIRCLED LATIN SMALL LETTER Z]
- case '\u2C6C': // ⱬ [LATIN SMALL LETTER Z WITH DESCENDER]
- case '\uA763': // ꝣ [LATIN SMALL LETTER VISIGOTHIC Z]
- case '\uFF5A': // z [FULLWIDTH LATIN SMALL LETTER Z]
- output[outputPos++] = 'z';
- break;
- case '\u24B5': // ⒵ [PARENTHESIZED LATIN SMALL LETTER Z]
- output[outputPos++] = '(';
- output[outputPos++] = 'z';
- output[outputPos++] = ')';
- break;
- case '\u2070': // ⁰ [SUPERSCRIPT ZERO]
- case '\u2080': // ₀ [SUBSCRIPT ZERO]
- case '\u24EA': // ⓪ [CIRCLED DIGIT ZERO]
- case '\u24FF': // ⓿ [NEGATIVE CIRCLED DIGIT ZERO]
- case '\uFF10': // 0 [FULLWIDTH DIGIT ZERO]
- output[outputPos++] = '0';
- break;
- case '\u00B9': // ¹ [SUPERSCRIPT ONE]
- case '\u2081': // ₁ [SUBSCRIPT ONE]
- case '\u2460': // ① [CIRCLED DIGIT ONE]
- case '\u24F5': // ⓵ [DOUBLE CIRCLED DIGIT ONE]
- case '\u2776': // ❶ [DINGBAT NEGATIVE CIRCLED DIGIT ONE]
- case '\u2780': // ➀ [DINGBAT CIRCLED SANS-SERIF DIGIT ONE]
- case '\u278A': // ➊ [DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT ONE]
- case '\uFF11': // 1 [FULLWIDTH DIGIT ONE]
- output[outputPos++] = '1';
- break;
- case '\u2488': // ⒈ [DIGIT ONE FULL STOP]
- output[outputPos++] = '1';
- output[outputPos++] = '.';
- break;
- case '\u2474': // ⑴ [PARENTHESIZED DIGIT ONE]
- output[outputPos++] = '(';
- output[outputPos++] = '1';
- output[outputPos++] = ')';
- break;
- case '\u00B2': // ² [SUPERSCRIPT TWO]
- case '\u2082': // ₂ [SUBSCRIPT TWO]
- case '\u2461': // ② [CIRCLED DIGIT TWO]
- case '\u24F6': // ⓶ [DOUBLE CIRCLED DIGIT TWO]
- case '\u2777': // ❷ [DINGBAT NEGATIVE CIRCLED DIGIT TWO]
- case '\u2781': // ➁ [DINGBAT CIRCLED SANS-SERIF DIGIT TWO]
- case '\u278B': // ➋ [DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT TWO]
- case '\uFF12': // 2 [FULLWIDTH DIGIT TWO]
- output[outputPos++] = '2';
- break;
- case '\u2489': // ⒉ [DIGIT TWO FULL STOP]
- output[outputPos++] = '2';
- output[outputPos++] = '.';
- break;
- case '\u2475': // ⑵ [PARENTHESIZED DIGIT TWO]
- output[outputPos++] = '(';
- output[outputPos++] = '2';
- output[outputPos++] = ')';
- break;
- case '\u00B3': // ³ [SUPERSCRIPT THREE]
- case '\u2083': // ₃ [SUBSCRIPT THREE]
- case '\u2462': // ③ [CIRCLED DIGIT THREE]
- case '\u24F7': // ⓷ [DOUBLE CIRCLED DIGIT THREE]
- case '\u2778': // ❸ [DINGBAT NEGATIVE CIRCLED DIGIT THREE]
- case '\u2782': // ➂ [DINGBAT CIRCLED SANS-SERIF DIGIT THREE]
- case '\u278C': // ➌ [DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT THREE]
- case '\uFF13': // 3 [FULLWIDTH DIGIT THREE]
- output[outputPos++] = '3';
- break;
- case '\u248A': // ⒊ [DIGIT THREE FULL STOP]
- output[outputPos++] = '3';
- output[outputPos++] = '.';
- break;
- case '\u2476': // ⑶ [PARENTHESIZED DIGIT THREE]
- output[outputPos++] = '(';
- output[outputPos++] = '3';
- output[outputPos++] = ')';
- break;
- case '\u2074': // ⁴ [SUPERSCRIPT FOUR]
- case '\u2084': // ₄ [SUBSCRIPT FOUR]
- case '\u2463': // ④ [CIRCLED DIGIT FOUR]
- case '\u24F8': // ⓸ [DOUBLE CIRCLED DIGIT FOUR]
- case '\u2779': // ❹ [DINGBAT NEGATIVE CIRCLED DIGIT FOUR]
- case '\u2783': // ➃ [DINGBAT CIRCLED SANS-SERIF DIGIT FOUR]
- case '\u278D': // ➍ [DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT FOUR]
- case '\uFF14': // 4 [FULLWIDTH DIGIT FOUR]
- output[outputPos++] = '4';
- break;
- case '\u248B': // ⒋ [DIGIT FOUR FULL STOP]
- output[outputPos++] = '4';
- output[outputPos++] = '.';
- break;
- case '\u2477': // ⑷ [PARENTHESIZED DIGIT FOUR]
- output[outputPos++] = '(';
- output[outputPos++] = '4';
- output[outputPos++] = ')';
- break;
- case '\u2075': // ⁵ [SUPERSCRIPT FIVE]
- case '\u2085': // ₅ [SUBSCRIPT FIVE]
- case '\u2464': // ⑤ [CIRCLED DIGIT FIVE]
- case '\u24F9': // ⓹ [DOUBLE CIRCLED DIGIT FIVE]
- case '\u277A': // ❺ [DINGBAT NEGATIVE CIRCLED DIGIT FIVE]
- case '\u2784': // ➄ [DINGBAT CIRCLED SANS-SERIF DIGIT FIVE]
- case '\u278E': // ➎ [DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT FIVE]
- case '\uFF15': // 5 [FULLWIDTH DIGIT FIVE]
- output[outputPos++] = '5';
- break;
- case '\u248C': // ⒌ [DIGIT FIVE FULL STOP]
- output[outputPos++] = '5';
- output[outputPos++] = '.';
- break;
- case '\u2478': // ⑸ [PARENTHESIZED DIGIT FIVE]
- output[outputPos++] = '(';
- output[outputPos++] = '5';
- output[outputPos++] = ')';
- break;
- case '\u2076': // ⁶ [SUPERSCRIPT SIX]
- case '\u2086': // ₆ [SUBSCRIPT SIX]
- case '\u2465': // ⑥ [CIRCLED DIGIT SIX]
- case '\u24FA': // ⓺ [DOUBLE CIRCLED DIGIT SIX]
- case '\u277B': // ❻ [DINGBAT NEGATIVE CIRCLED DIGIT SIX]
- case '\u2785': // ➅ [DINGBAT CIRCLED SANS-SERIF DIGIT SIX]
- case '\u278F': // ➏ [DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT SIX]
- case '\uFF16': // 6 [FULLWIDTH DIGIT SIX]
- output[outputPos++] = '6';
- break;
- case '\u248D': // ⒍ [DIGIT SIX FULL STOP]
- output[outputPos++] = '6';
- output[outputPos++] = '.';
- break;
- case '\u2479': // ⑹ [PARENTHESIZED DIGIT SIX]
- output[outputPos++] = '(';
- output[outputPos++] = '6';
- output[outputPos++] = ')';
- break;
- case '\u2077': // ⁷ [SUPERSCRIPT SEVEN]
- case '\u2087': // ₇ [SUBSCRIPT SEVEN]
- case '\u2466': // ⑦ [CIRCLED DIGIT SEVEN]
- case '\u24FB': // ⓻ [DOUBLE CIRCLED DIGIT SEVEN]
- case '\u277C': // ❼ [DINGBAT NEGATIVE CIRCLED DIGIT SEVEN]
- case '\u2786': // ➆ [DINGBAT CIRCLED SANS-SERIF DIGIT SEVEN]
- case '\u2790': // ➐ [DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT SEVEN]
- case '\uFF17': // 7 [FULLWIDTH DIGIT SEVEN]
- output[outputPos++] = '7';
- break;
- case '\u248E': // ⒎ [DIGIT SEVEN FULL STOP]
- output[outputPos++] = '7';
- output[outputPos++] = '.';
- break;
- case '\u247A': // ⑺ [PARENTHESIZED DIGIT SEVEN]
- output[outputPos++] = '(';
- output[outputPos++] = '7';
- output[outputPos++] = ')';
- break;
- case '\u2078': // ⁸ [SUPERSCRIPT EIGHT]
- case '\u2088': // ₈ [SUBSCRIPT EIGHT]
- case '\u2467': // ⑧ [CIRCLED DIGIT EIGHT]
- case '\u24FC': // ⓼ [DOUBLE CIRCLED DIGIT EIGHT]
- case '\u277D': // ❽ [DINGBAT NEGATIVE CIRCLED DIGIT EIGHT]
- case '\u2787': // ➇ [DINGBAT CIRCLED SANS-SERIF DIGIT EIGHT]
- case '\u2791': // ➑ [DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT EIGHT]
- case '\uFF18': // 8 [FULLWIDTH DIGIT EIGHT]
- output[outputPos++] = '8';
- break;
- case '\u248F': // ⒏ [DIGIT EIGHT FULL STOP]
- output[outputPos++] = '8';
- output[outputPos++] = '.';
- break;
- case '\u247B': // ⑻ [PARENTHESIZED DIGIT EIGHT]
- output[outputPos++] = '(';
- output[outputPos++] = '8';
- output[outputPos++] = ')';
- break;
- case '\u2079': // ⁹ [SUPERSCRIPT NINE]
- case '\u2089': // ₉ [SUBSCRIPT NINE]
- case '\u2468': // ⑨ [CIRCLED DIGIT NINE]
- case '\u24FD': // ⓽ [DOUBLE CIRCLED DIGIT NINE]
- case '\u277E': // ❾ [DINGBAT NEGATIVE CIRCLED DIGIT NINE]
- case '\u2788': // ➈ [DINGBAT CIRCLED SANS-SERIF DIGIT NINE]
- case '\u2792': // ➒ [DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT NINE]
- case '\uFF19': // 9 [FULLWIDTH DIGIT NINE]
- output[outputPos++] = '9';
- break;
- case '\u2490': // ⒐ [DIGIT NINE FULL STOP]
- output[outputPos++] = '9';
- output[outputPos++] = '.';
- break;
- case '\u247C': // ⑼ [PARENTHESIZED DIGIT NINE]
- output[outputPos++] = '(';
- output[outputPos++] = '9';
- output[outputPos++] = ')';
- break;
- case '\u2469': // ⑩ [CIRCLED NUMBER TEN]
- case '\u24FE': // ⓾ [DOUBLE CIRCLED NUMBER TEN]
- case '\u277F': // ❿ [DINGBAT NEGATIVE CIRCLED NUMBER TEN]
- case '\u2789': // ➉ [DINGBAT CIRCLED SANS-SERIF NUMBER TEN]
- case '\u2793': // ➓ [DINGBAT NEGATIVE CIRCLED SANS-SERIF NUMBER TEN]
- output[outputPos++] = '1';
- output[outputPos++] = '0';
- break;
- case '\u2491': // ⒑ [NUMBER TEN FULL STOP]
- output[outputPos++] = '1';
- output[outputPos++] = '0';
- output[outputPos++] = '.';
- break;
- case '\u247D': // ⑽ [PARENTHESIZED NUMBER TEN]
- output[outputPos++] = '(';
- output[outputPos++] = '1';
- output[outputPos++] = '0';
- output[outputPos++] = ')';
- break;
- case '\u246A': // ⑪ [CIRCLED NUMBER ELEVEN]
- case '\u24EB': // ⓫ [NEGATIVE CIRCLED NUMBER ELEVEN]
- output[outputPos++] = '1';
- output[outputPos++] = '1';
- break;
- case '\u2492': // ⒒ [NUMBER ELEVEN FULL STOP]
- output[outputPos++] = '1';
- output[outputPos++] = '1';
- output[outputPos++] = '.';
- break;
- case '\u247E': // ⑾ [PARENTHESIZED NUMBER ELEVEN]
- output[outputPos++] = '(';
- output[outputPos++] = '1';
- output[outputPos++] = '1';
- output[outputPos++] = ')';
- break;
- case '\u246B': // ⑫ [CIRCLED NUMBER TWELVE]
- case '\u24EC': // ⓬ [NEGATIVE CIRCLED NUMBER TWELVE]
- output[outputPos++] = '1';
- output[outputPos++] = '2';
- break;
- case '\u2493': // ⒓ [NUMBER TWELVE FULL STOP]
- output[outputPos++] = '1';
- output[outputPos++] = '2';
- output[outputPos++] = '.';
- break;
- case '\u247F': // ⑿ [PARENTHESIZED NUMBER TWELVE]
- output[outputPos++] = '(';
- output[outputPos++] = '1';
- output[outputPos++] = '2';
- output[outputPos++] = ')';
- break;
- case '\u246C': // ⑬ [CIRCLED NUMBER THIRTEEN]
- case '\u24ED': // ⓭ [NEGATIVE CIRCLED NUMBER THIRTEEN]
- output[outputPos++] = '1';
- output[outputPos++] = '3';
- break;
- case '\u2494': // ⒔ [NUMBER THIRTEEN FULL STOP]
- output[outputPos++] = '1';
- output[outputPos++] = '3';
- output[outputPos++] = '.';
- break;
- case '\u2480': // ⒀ [PARENTHESIZED NUMBER THIRTEEN]
- output[outputPos++] = '(';
- output[outputPos++] = '1';
- output[outputPos++] = '3';
- output[outputPos++] = ')';
- break;
- case '\u246D': // ⑭ [CIRCLED NUMBER FOURTEEN]
- case '\u24EE': // ⓮ [NEGATIVE CIRCLED NUMBER FOURTEEN]
- output[outputPos++] = '1';
- output[outputPos++] = '4';
- break;
- case '\u2495': // ⒕ [NUMBER FOURTEEN FULL STOP]
- output[outputPos++] = '1';
- output[outputPos++] = '4';
- output[outputPos++] = '.';
- break;
- case '\u2481': // ⒁ [PARENTHESIZED NUMBER FOURTEEN]
- output[outputPos++] = '(';
- output[outputPos++] = '1';
- output[outputPos++] = '4';
- output[outputPos++] = ')';
- break;
- case '\u246E': // ⑮ [CIRCLED NUMBER FIFTEEN]
- case '\u24EF': // ⓯ [NEGATIVE CIRCLED NUMBER FIFTEEN]
- output[outputPos++] = '1';
- output[outputPos++] = '5';
- break;
- case '\u2496': // ⒖ [NUMBER FIFTEEN FULL STOP]
- output[outputPos++] = '1';
- output[outputPos++] = '5';
- output[outputPos++] = '.';
- break;
- case '\u2482': // ⒂ [PARENTHESIZED NUMBER FIFTEEN]
- output[outputPos++] = '(';
- output[outputPos++] = '1';
- output[outputPos++] = '5';
- output[outputPos++] = ')';
- break;
- case '\u246F': // ⑯ [CIRCLED NUMBER SIXTEEN]
- case '\u24F0': // ⓰ [NEGATIVE CIRCLED NUMBER SIXTEEN]
- output[outputPos++] = '1';
- output[outputPos++] = '6';
- break;
- case '\u2497': // ⒗ [NUMBER SIXTEEN FULL STOP]
- output[outputPos++] = '1';
- output[outputPos++] = '6';
- output[outputPos++] = '.';
- break;
- case '\u2483': // ⒃ [PARENTHESIZED NUMBER SIXTEEN]
- output[outputPos++] = '(';
- output[outputPos++] = '1';
- output[outputPos++] = '6';
- output[outputPos++] = ')';
- break;
- case '\u2470': // ⑰ [CIRCLED NUMBER SEVENTEEN]
- case '\u24F1': // ⓱ [NEGATIVE CIRCLED NUMBER SEVENTEEN]
- output[outputPos++] = '1';
- output[outputPos++] = '7';
- break;
- case '\u2498': // ⒘ [NUMBER SEVENTEEN FULL STOP]
- output[outputPos++] = '1';
- output[outputPos++] = '7';
- output[outputPos++] = '.';
- break;
- case '\u2484': // ⒄ [PARENTHESIZED NUMBER SEVENTEEN]
- output[outputPos++] = '(';
- output[outputPos++] = '1';
- output[outputPos++] = '7';
- output[outputPos++] = ')';
- break;
- case '\u2471': // ⑱ [CIRCLED NUMBER EIGHTEEN]
- case '\u24F2': // ⓲ [NEGATIVE CIRCLED NUMBER EIGHTEEN]
- output[outputPos++] = '1';
- output[outputPos++] = '8';
- break;
- case '\u2499': // ⒙ [NUMBER EIGHTEEN FULL STOP]
- output[outputPos++] = '1';
- output[outputPos++] = '8';
- output[outputPos++] = '.';
- break;
- case '\u2485': // ⒅ [PARENTHESIZED NUMBER EIGHTEEN]
- output[outputPos++] = '(';
- output[outputPos++] = '1';
- output[outputPos++] = '8';
- output[outputPos++] = ')';
- break;
- case '\u2472': // ⑲ [CIRCLED NUMBER NINETEEN]
- case '\u24F3': // ⓳ [NEGATIVE CIRCLED NUMBER NINETEEN]
- output[outputPos++] = '1';
- output[outputPos++] = '9';
- break;
- case '\u249A': // ⒚ [NUMBER NINETEEN FULL STOP]
- output[outputPos++] = '1';
- output[outputPos++] = '9';
- output[outputPos++] = '.';
- break;
- case '\u2486': // ⒆ [PARENTHESIZED NUMBER NINETEEN]
- output[outputPos++] = '(';
- output[outputPos++] = '1';
- output[outputPos++] = '9';
- output[outputPos++] = ')';
- break;
- case '\u2473': // ⑳ [CIRCLED NUMBER TWENTY]
- case '\u24F4': // ⓴ [NEGATIVE CIRCLED NUMBER TWENTY]
- output[outputPos++] = '2';
- output[outputPos++] = '0';
- break;
- case '\u249B': // ⒛ [NUMBER TWENTY FULL STOP]
- output[outputPos++] = '2';
- output[outputPos++] = '0';
- output[outputPos++] = '.';
- break;
- case '\u2487': // ⒇ [PARENTHESIZED NUMBER TWENTY]
- output[outputPos++] = '(';
- output[outputPos++] = '2';
- output[outputPos++] = '0';
- output[outputPos++] = ')';
- break;
- case '\u00AB': // « [LEFT-POINTING DOUBLE ANGLE QUOTATION MARK]
- case '\u00BB': // » [RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK]
- case '\u201C': // “ [LEFT DOUBLE QUOTATION MARK]
- case '\u201D': // ” [RIGHT DOUBLE QUOTATION MARK]
- case '\u201E': // „ [DOUBLE LOW-9 QUOTATION MARK]
- case '\u2033': // ″ [DOUBLE PRIME]
- case '\u2036': // ‶ [REVERSED DOUBLE PRIME]
- case '\u275D': // ❝ [HEAVY DOUBLE TURNED COMMA QUOTATION MARK ORNAMENT]
- case '\u275E': // ❞ [HEAVY DOUBLE COMMA QUOTATION MARK ORNAMENT]
- case '\u276E': // ❮ [HEAVY LEFT-POINTING ANGLE QUOTATION MARK ORNAMENT]
- case '\u276F': // ❯ [HEAVY RIGHT-POINTING ANGLE QUOTATION MARK ORNAMENT]
- case '\uFF02': // " [FULLWIDTH QUOTATION MARK]
- output[outputPos++] = '"';
- break;
- case '\u2018': // ‘ [LEFT SINGLE QUOTATION MARK]
- case '\u2019': // ’ [RIGHT SINGLE QUOTATION MARK]
- case '\u201A': // ‚ [SINGLE LOW-9 QUOTATION MARK]
- case '\u201B': // ‛ [SINGLE HIGH-REVERSED-9 QUOTATION MARK]
- case '\u2032': // ′ [PRIME]
- case '\u2035': // ‵ [REVERSED PRIME]
- case '\u2039': // ‹ [SINGLE LEFT-POINTING ANGLE QUOTATION MARK]
- case '\u203A': // › [SINGLE RIGHT-POINTING ANGLE QUOTATION MARK]
- case '\u275B': // ❛ [HEAVY SINGLE TURNED COMMA QUOTATION MARK ORNAMENT]
- case '\u275C': // ❜ [HEAVY SINGLE COMMA QUOTATION MARK ORNAMENT]
- case '\uFF07': // ' [FULLWIDTH APOSTROPHE]
- output[outputPos++] = '\'';
- break;
- case '\u2010': // ‐ [HYPHEN]
- case '\u2011': // ‑ [NON-BREAKING HYPHEN]
- case '\u2012': // ‒ [FIGURE DASH]
- case '\u2013': // – [EN DASH]
- case '\u2014': // — [EM DASH]
- case '\u207B': // ⁻ [SUPERSCRIPT MINUS]
- case '\u208B': // ₋ [SUBSCRIPT MINUS]
- case '\uFF0D': // - [FULLWIDTH HYPHEN-MINUS]
- output[outputPos++] = '-';
- break;
- case '\u2045': // ⁅ [LEFT SQUARE BRACKET WITH QUILL]
- case '\u2772': // ❲ [LIGHT LEFT TORTOISE SHELL BRACKET ORNAMENT]
- case '\uFF3B': // [ [FULLWIDTH LEFT SQUARE BRACKET]
- output[outputPos++] = '[';
- break;
- case '\u2046': // ⁆ [RIGHT SQUARE BRACKET WITH QUILL]
- case '\u2773': // ❳ [LIGHT RIGHT TORTOISE SHELL BRACKET ORNAMENT]
- case '\uFF3D': // ] [FULLWIDTH RIGHT SQUARE BRACKET]
- output[outputPos++] = ']';
- break;
- case '\u207D': // ⁽ [SUPERSCRIPT LEFT PARENTHESIS]
- case '\u208D': // ₍ [SUBSCRIPT LEFT PARENTHESIS]
- case '\u2768': // ❨ [MEDIUM LEFT PARENTHESIS ORNAMENT]
- case '\u276A': // ❪ [MEDIUM FLATTENED LEFT PARENTHESIS ORNAMENT]
- case '\uFF08': // ( [FULLWIDTH LEFT PARENTHESIS]
- output[outputPos++] = '(';
- break;
- case '\u2E28': // ⸨ [LEFT DOUBLE PARENTHESIS]
- output[outputPos++] = '(';
- output[outputPos++] = '(';
- break;
- case '\u207E': // ⁾ [SUPERSCRIPT RIGHT PARENTHESIS]
- case '\u208E': // ₎ [SUBSCRIPT RIGHT PARENTHESIS]
- case '\u2769': // ❩ [MEDIUM RIGHT PARENTHESIS ORNAMENT]
- case '\u276B': // ❫ [MEDIUM FLATTENED RIGHT PARENTHESIS ORNAMENT]
- case '\uFF09': // ) [FULLWIDTH RIGHT PARENTHESIS]
- output[outputPos++] = ')';
- break;
- case '\u2E29': // ⸩ [RIGHT DOUBLE PARENTHESIS]
- output[outputPos++] = ')';
- output[outputPos++] = ')';
- break;
- case '\u276C': // ❬ [MEDIUM LEFT-POINTING ANGLE BRACKET ORNAMENT]
- case '\u2770': // ❰ [HEAVY LEFT-POINTING ANGLE BRACKET ORNAMENT]
- case '\uFF1C': // < [FULLWIDTH LESS-THAN SIGN]
- output[outputPos++] = '<';
- break;
- case '\u276D': // ❭ [MEDIUM RIGHT-POINTING ANGLE BRACKET ORNAMENT]
- case '\u2771': // ❱ [HEAVY RIGHT-POINTING ANGLE BRACKET ORNAMENT]
- case '\uFF1E': // > [FULLWIDTH GREATER-THAN SIGN]
- output[outputPos++] = '>';
- break;
- case '\u2774': // ❴ [MEDIUM LEFT CURLY BRACKET ORNAMENT]
- case '\uFF5B': // { [FULLWIDTH LEFT CURLY BRACKET]
- output[outputPos++] = '{';
- break;
- case '\u2775': // ❵ [MEDIUM RIGHT CURLY BRACKET ORNAMENT]
- case '\uFF5D': // } [FULLWIDTH RIGHT CURLY BRACKET]
- output[outputPos++] = '}';
- break;
- case '\u207A': // ⁺ [SUPERSCRIPT PLUS SIGN]
- case '\u208A': // ₊ [SUBSCRIPT PLUS SIGN]
- case '\uFF0B': // + [FULLWIDTH PLUS SIGN]
- output[outputPos++] = '+';
- break;
- case '\u207C': // ⁼ [SUPERSCRIPT EQUALS SIGN]
- case '\u208C': // ₌ [SUBSCRIPT EQUALS SIGN]
- case '\uFF1D': // = [FULLWIDTH EQUALS SIGN]
- output[outputPos++] = '=';
- break;
- case '\uFF01': // ! [FULLWIDTH EXCLAMATION MARK]
- output[outputPos++] = '!';
- break;
- case '\u203C': // ‼ [DOUBLE EXCLAMATION MARK]
- output[outputPos++] = '!';
- output[outputPos++] = '!';
- break;
- case '\u2049': // ⁉ [EXCLAMATION QUESTION MARK]
- output[outputPos++] = '!';
- output[outputPos++] = '?';
- break;
- case '\uFF03': // # [FULLWIDTH NUMBER SIGN]
- output[outputPos++] = '#';
- break;
- case '\uFF04': // $ [FULLWIDTH DOLLAR SIGN]
- output[outputPos++] = '$';
- break;
- case '\u2052': // ⁒ [COMMERCIAL MINUS SIGN]
- case '\uFF05': // % [FULLWIDTH PERCENT SIGN]
- output[outputPos++] = '%';
- break;
- case '\uFF06': // & [FULLWIDTH AMPERSAND]
- output[outputPos++] = '&';
- break;
- case '\u204E': // ⁎ [LOW ASTERISK]
- case '\uFF0A': // * [FULLWIDTH ASTERISK]
- output[outputPos++] = '*';
- break;
- case '\uFF0C': // , [FULLWIDTH COMMA]
- output[outputPos++] = ',';
- break;
- case '\uFF0E': // . [FULLWIDTH FULL STOP]
- output[outputPos++] = '.';
- break;
- case '\u2044': // ⁄ [FRACTION SLASH]
- case '\uFF0F': // / [FULLWIDTH SOLIDUS]
- output[outputPos++] = '/';
- break;
- case '\uFF1A': // : [FULLWIDTH COLON]
- output[outputPos++] = ':';
- break;
- case '\u204F': // ⁏ [REVERSED SEMICOLON]
- case '\uFF1B': // ; [FULLWIDTH SEMICOLON]
- output[outputPos++] = ';';
- break;
- case '\uFF1F': // ? [FULLWIDTH QUESTION MARK]
- output[outputPos++] = '?';
- break;
- case '\u2047': // ⁇ [DOUBLE QUESTION MARK]
- output[outputPos++] = '?';
- output[outputPos++] = '?';
- break;
- case '\u2048': // ⁈ [QUESTION EXCLAMATION MARK]
- output[outputPos++] = '?';
- output[outputPos++] = '!';
- break;
- case '\uFF20': // @ [FULLWIDTH COMMERCIAL AT]
- output[outputPos++] = '@';
- break;
- case '\uFF3C': // \ [FULLWIDTH REVERSE SOLIDUS]
- output[outputPos++] = '\\';
- break;
- case '\u2038': // ‸ [CARET]
- case '\uFF3E': // ^ [FULLWIDTH CIRCUMFLEX ACCENT]
- output[outputPos++] = '^';
- break;
- case '\uFF3F': // _ [FULLWIDTH LOW LINE]
- output[outputPos++] = '_';
- break;
- case '\u2053': // ⁓ [SWUNG DASH]
- case '\uFF5E': // ~ [FULLWIDTH TILDE]
- output[outputPos++] = '~';
- break;
- default:
- output[outputPos++] = c;
- break;
- }
- }
- }
- return outputPos;
- }
-}
diff --git a/src/main/java/org/apache/lucene/queryparser/XSimpleQueryParser.java b/src/main/java/org/apache/lucene/queryparser/XSimpleQueryParser.java
deleted file mode 100644
index 1dba15560881d..0000000000000
--- a/src/main/java/org/apache/lucene/queryparser/XSimpleQueryParser.java
+++ /dev/null
@@ -1,627 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.lucene.queryparser;
-
-import org.apache.lucene.analysis.Analyzer;
-import org.apache.lucene.index.Term;
-import org.apache.lucene.search.BooleanClause;
-import org.apache.lucene.search.BooleanQuery;
-import org.apache.lucene.search.FuzzyQuery;
-import org.apache.lucene.search.MatchAllDocsQuery;
-import org.apache.lucene.search.PrefixQuery;
-import org.apache.lucene.search.Query;
-import org.apache.lucene.util.QueryBuilder;
-import org.apache.lucene.util.Version;
-import org.apache.lucene.util.automaton.LevenshteinAutomata;
-import org.elasticsearch.common.lucene.Lucene;
-
-import java.util.Collections;
-import java.util.Map;
-
-/**
- * SimpleQueryParser is used to parse human readable query syntax.
- *
- * The main idea behind this parser is that a person should be able to type
- * whatever they want to represent a query, and this parser will do its best
- * to interpret what to search for no matter how poorly composed the request
- * may be. Tokens are considered to be any of a term, phrase, or subquery for the
- * operations described below. Whitespace including ' ' '\n' '\r' and '\t'
- * and certain operators may be used to delimit tokens ( ) + | " .
- *
- * Any errors in query syntax will be ignored and the parser will attempt
- * to decipher what it can; however, this may mean odd or unexpected results.
- *
Query Operators
- *
- * - '{@code +}' specifies {@code AND} operation: token1+token2
- *
- '{@code |}' specifies {@code OR} operation: token1|token2
- *
- '{@code -}' negates a single token: -token0
- *
- '{@code "}' creates phrases of terms: "term1 term2 ..."
- *
- '{@code *}' at the end of terms specifies prefix query: term*
- *
- '{@code ~}N' at the end of terms specifies fuzzy query: term~1
- *
- '{@code ~}N' at the end of phrases specifies near query: "term1 term2"~5
- *
- '{@code (}' and '{@code )}' specifies precedence: token1 + (token2 | token3)
- *
- *
- * The {@link #setDefaultOperator default operator} is {@code OR} if no other operator is specified.
- * For example, the following will {@code OR} {@code token1} and {@code token2} together:
- * token1 token2
- *
- * Normal operator precedence will be simple order from right to left.
- * For example, the following will evaluate {@code token1 OR token2} first,
- * then {@code AND} with {@code token3}:
- *
token1 | token2 + token3
- * Escaping
- *
- * An individual term may contain any possible character with certain characters
- * requiring escaping using a '{@code \}'. The following characters will need to be escaped in
- * terms and phrases:
- * {@code + | " ( ) ' \}
- *
- * The '{@code -}' operator is a special case. On individual terms (not phrases) the first
- * character of a term that is {@code -} must be escaped; however, any '{@code -}' characters
- * beyond the first character do not need to be escaped.
- * For example:
- *
- * - {@code -term1} -- Specifies {@code NOT} operation against {@code term1}
- *
- {@code \-term1} -- Searches for the term {@code -term1}.
- *
- {@code term-1} -- Searches for the term {@code term-1}.
- *
- {@code term\-1} -- Searches for the term {@code term-1}.
- *
- *
- * The '{@code *}' operator is a special case. On individual terms (not phrases) the last
- * character of a term that is '{@code *}' must be escaped; however, any '{@code *}' characters
- * before the last character do not need to be escaped:
- *
- * - {@code term1*} -- Searches for the prefix {@code term1}
- *
- {@code term1\*} -- Searches for the term {@code term1*}
- *
- {@code term*1} -- Searches for the term {@code term*1}
- *
- {@code term\*1} -- Searches for the term {@code term*1}
- *
- *
- * Note that above examples consider the terms before text processing.
- */
-public class XSimpleQueryParser extends QueryBuilder {
-
- static {
- assert Version.LUCENE_46.onOrAfter(Lucene.VERSION) : "Lucene 4.7 adds SimpleQueryParser, remove me!";
- }
-
- /** Map of fields to query against with their weights */
- protected final Map weights;
- /** flags to the parser (to turn features on/off) */
- protected final int flags;
-
- /** Enables {@code AND} operator (+) */
- public static final int AND_OPERATOR = 1<<0;
- /** Enables {@code NOT} operator (-) */
- public static final int NOT_OPERATOR = 1<<1;
- /** Enables {@code OR} operator (|) */
- public static final int OR_OPERATOR = 1<<2;
- /** Enables {@code PREFIX} operator (*) */
- public static final int PREFIX_OPERATOR = 1<<3;
- /** Enables {@code PHRASE} operator (") */
- public static final int PHRASE_OPERATOR = 1<<4;
- /** Enables {@code PRECEDENCE} operators: {@code (} and {@code )} */
- public static final int PRECEDENCE_OPERATORS = 1<<5;
- /** Enables {@code ESCAPE} operator (\) */
- public static final int ESCAPE_OPERATOR = 1<<6;
- /** Enables {@code WHITESPACE} operators: ' ' '\n' '\r' '\t' */
- public static final int WHITESPACE_OPERATOR = 1<<7;
- /** Enables {@code FUZZY} operators: (~) on single terms */
- public static final int FUZZY_OPERATOR = 1<<8;
- /** Enables {@code NEAR} operators: (~) on phrases */
- public static final int NEAR_OPERATOR = 1<<9;
-
-
- private BooleanClause.Occur defaultOperator = BooleanClause.Occur.SHOULD;
-
- /** Creates a new parser searching over a single field. */
- public XSimpleQueryParser(Analyzer analyzer, String field) {
- this(analyzer, Collections.singletonMap(field, 1.0F));
- }
-
- /** Creates a new parser searching over multiple fields with different weights. */
- public XSimpleQueryParser(Analyzer analyzer, Map weights) {
- this(analyzer, weights, -1);
- }
-
- /** Creates a new parser with custom flags used to enable/disable certain features. */
- public XSimpleQueryParser(Analyzer analyzer, Map weights, int flags) {
- super(analyzer);
- this.weights = weights;
- this.flags = flags;
- }
-
- /** Parses the query text and returns parsed query (or null if empty) */
- public Query parse(String queryText) {
- char data[] = queryText.toCharArray();
- char buffer[] = new char[data.length];
-
- State state = new State(data, buffer, 0, data.length);
- parseSubQuery(state);
- return state.top;
- }
-
- private void parseSubQuery(State state) {
- while (state.index < state.length) {
- if (state.data[state.index] == '(' && (flags & PRECEDENCE_OPERATORS) != 0) {
- // the beginning of a subquery has been found
- consumeSubQuery(state);
- } else if (state.data[state.index] == ')' && (flags & PRECEDENCE_OPERATORS) != 0) {
- // this is an extraneous character so it is ignored
- ++state.index;
- } else if (state.data[state.index] == '"' && (flags & PHRASE_OPERATOR) != 0) {
- // the beginning of a phrase has been found
- consumePhrase(state);
- } else if (state.data[state.index] == '+' && (flags & AND_OPERATOR) != 0) {
- // an and operation has been explicitly set
- // if an operation has already been set this one is ignored
- // if a term (or phrase or subquery) has not been found yet the
- // operation is also ignored since there is no previous
- // term (or phrase or subquery) to and with
- if (state.currentOperation == null && state.top != null) {
- state.currentOperation = BooleanClause.Occur.MUST;
- }
-
- ++state.index;
- } else if (state.data[state.index] == '|' && (flags & OR_OPERATOR) != 0) {
- // an or operation has been explicitly set
- // if an operation has already been set this one is ignored
- // if a term (or phrase or subquery) has not been found yet the
- // operation is also ignored since there is no previous
- // term (or phrase or subquery) to or with
- if (state.currentOperation == null && state.top != null) {
- state.currentOperation = BooleanClause.Occur.SHOULD;
- }
-
- ++state.index;
- } else if (state.data[state.index] == '-' && (flags & NOT_OPERATOR) != 0) {
- // a not operator has been found, so increase the not count
- // two not operators in a row negate each other
- ++state.not;
- ++state.index;
-
- // continue so the not operator is not reset
- // before the next character is determined
- continue;
- } else if ((state.data[state.index] == ' '
- || state.data[state.index] == '\t'
- || state.data[state.index] == '\n'
- || state.data[state.index] == '\r') && (flags & WHITESPACE_OPERATOR) != 0) {
- // ignore any whitespace found as it may have already been
- // used a delimiter across a term (or phrase or subquery)
- // or is simply extraneous
- ++state.index;
- } else {
- // the beginning of a token has been found
- consumeToken(state);
- }
-
- // reset the not operator as even whitespace is not allowed when
- // specifying the not operation for a term (or phrase or subquery)
- state.not = 0;
- }
- }
-
- private void consumeSubQuery(State state) {
- assert (flags & PRECEDENCE_OPERATORS) != 0;
- int start = ++state.index;
- int precedence = 1;
- boolean escaped = false;
-
- while (state.index < state.length) {
- if (!escaped) {
- if (state.data[state.index] == '\\' && (flags & ESCAPE_OPERATOR) != 0) {
- // an escape character has been found so
- // whatever character is next will become
- // part of the subquery unless the escape
- // character is the last one in the data
- escaped = true;
- ++state.index;
-
- continue;
- } else if (state.data[state.index] == '(') {
- // increase the precedence as there is a
- // subquery in the current subquery
- ++precedence;
- } else if (state.data[state.index] == ')') {
- --precedence;
-
- if (precedence == 0) {
- // this should be the end of the subquery
- // all characters found will used for
- // creating the subquery
- break;
- }
- }
- }
-
- escaped = false;
- ++state.index;
- }
-
- if (state.index == state.length) {
- // a closing parenthesis was never found so the opening
- // parenthesis is considered extraneous and will be ignored
- state.index = start;
- } else if (state.index == start) {
- // a closing parenthesis was found immediately after the opening
- // parenthesis so the current operation is reset since it would
- // have been applied to this subquery
- state.currentOperation = null;
-
- ++state.index;
- } else {
- // a complete subquery has been found and is recursively parsed by
- // starting over with a new state object
- State subState = new State(state.data, state.buffer, start, state.index);
- parseSubQuery(subState);
- buildQueryTree(state, subState.top);
-
- ++state.index;
- }
- }
-
- private void consumePhrase(State state) {
- assert (flags & PHRASE_OPERATOR) != 0;
- int start = ++state.index;
- int copied = 0;
- boolean escaped = false;
- boolean hasSlop = false;
-
- while (state.index < state.length) {
- if (!escaped) {
- if (state.data[state.index] == '\\' && (flags & ESCAPE_OPERATOR) != 0) {
- // an escape character has been found so
- // whatever character is next will become
- // part of the phrase unless the escape
- // character is the last one in the data
- escaped = true;
- ++state.index;
-
- continue;
- } else if (state.data[state.index] == '"') {
- // if there are still characters after the closing ", check for a
- // tilde
- if (state.length > (state.index + 1) &&
- state.data[state.index+1] == '~' &&
- (flags & NEAR_OPERATOR) != 0) {
- state.index++;
- // check for characters after the tilde
- if (state.length > (state.index + 1)) {
- hasSlop = true;
- }
- break;
- } else {
- // this should be the end of the phrase
- // all characters found will used for
- // creating the phrase query
- break;
- }
- }
- }
-
- escaped = false;
- state.buffer[copied++] = state.data[state.index++];
- }
-
- if (state.index == state.length) {
- // a closing double quote was never found so the opening
- // double quote is considered extraneous and will be ignored
- state.index = start;
- } else if (state.index == start) {
- // a closing double quote was found immediately after the opening
- // double quote so the current operation is reset since it would
- // have been applied to this phrase
- state.currentOperation = null;
-
- ++state.index;
- } else {
- // a complete phrase has been found and is parsed through
- // through the analyzer from the given field
- String phrase = new String(state.buffer, 0, copied);
- Query branch;
- if (hasSlop) {
- branch = newPhraseQuery(phrase, parseFuzziness(state));
- } else {
- branch = newPhraseQuery(phrase, 0);
- }
- buildQueryTree(state, branch);
-
- ++state.index;
- }
- }
-
- private void consumeToken(State state) {
- int copied = 0;
- boolean escaped = false;
- boolean prefix = false;
- boolean fuzzy = false;
-
- while (state.index < state.length) {
- if (!escaped) {
- if (state.data[state.index] == '\\' && (flags & ESCAPE_OPERATOR) != 0) {
- // an escape character has been found so
- // whatever character is next will become
- // part of the term unless the escape
- // character is the last one in the data
- escaped = true;
- prefix = false;
- ++state.index;
-
- continue;
- } else if (tokenFinished(state)) {
- // this should be the end of the term
- // all characters found will used for
- // creating the term query
- break;
- } else if (copied > 0 && state.data[state.index] == '~' && (flags & FUZZY_OPERATOR) != 0) {
- fuzzy = true;
- break;
- }
-
- // wildcard tracks whether or not the last character
- // was a '*' operator that hasn't been escaped
- // there must be at least one valid character before
- // searching for a prefixed set of terms
- prefix = copied > 0 && state.data[state.index] == '*' && (flags & PREFIX_OPERATOR) != 0;
- }
-
- escaped = false;
- state.buffer[copied++] = state.data[state.index++];
- }
-
- if (copied > 0) {
- final Query branch;
-
- if (fuzzy && (flags & FUZZY_OPERATOR) != 0) {
- String token = new String(state.buffer, 0, copied);
- int fuzziness = parseFuzziness(state);
- // edit distance has a maximum, limit to the maximum supported
- fuzziness = Math.min(fuzziness, LevenshteinAutomata.MAXIMUM_SUPPORTED_DISTANCE);
- if (fuzziness == 0) {
- branch = newDefaultQuery(token);
- } else {
- branch = newFuzzyQuery(token, fuzziness);
- }
- } else if (prefix) {
- // if a term is found with a closing '*' it is considered to be a prefix query
- // and will have prefix added as an option
- String token = new String(state.buffer, 0, copied - 1);
- branch = newPrefixQuery(token);
- } else {
- // a standard term has been found so it will be run through
- // the entire analysis chain from the specified schema field
- String token = new String(state.buffer, 0, copied);
- branch = newDefaultQuery(token);
- }
-
- buildQueryTree(state, branch);
- }
- }
-
- // buildQueryTree should be called after a term, phrase, or subquery
- // is consumed to be added to our existing query tree
- // this method will only add to the existing tree if the branch contained in state is not null
- private void buildQueryTree(State state, Query branch) {
- if (branch != null) {
- // modify our branch to a BooleanQuery wrapper for not
- // this is necessary any time a term, phrase, or subquery is negated
- if (state.not % 2 == 1) {
- BooleanQuery nq = new BooleanQuery();
- nq.add(branch, BooleanClause.Occur.MUST_NOT);
- nq.add(new MatchAllDocsQuery(), BooleanClause.Occur.SHOULD);
- branch = nq;
- }
-
- // first term (or phrase or subquery) found and will begin our query tree
- if (state.top == null) {
- state.top = branch;
- } else {
- // more than one term (or phrase or subquery) found
- // set currentOperation to the default if no other operation is explicitly set
- if (state.currentOperation == null) {
- state.currentOperation = defaultOperator;
- }
-
- // operational change requiring a new parent node
- // this occurs if the previous operation is not the same as current operation
- // because the previous operation must be evaluated separately to preserve
- // the proper precedence and the current operation will take over as the top of the tree
- if (state.previousOperation != state.currentOperation) {
- BooleanQuery bq = new BooleanQuery();
- bq.add(state.top, state.currentOperation);
- state.top = bq;
- }
-
- // reset all of the state for reuse
- ((BooleanQuery)state.top).add(branch, state.currentOperation);
- state.previousOperation = state.currentOperation;
- }
-
- // reset the current operation as it was intended to be applied to
- // the incoming term (or phrase or subquery) even if branch was null
- // due to other possible errors
- state.currentOperation = null;
- }
- }
-
- /**
- * Helper parsing fuzziness from parsing state
- * @return slop/edit distance, 0 in the case of non-parsing slop/edit string
- */
- private int parseFuzziness(State state) {
- char slopText[] = new char[state.length];
- int slopLength = 0;
-
- if (state.data[state.index] == '~') {
- while (state.index < state.length) {
- state.index++;
- // it's possible that the ~ was at the end, so check after incrementing
- // to make sure we don't go out of bounds
- if (state.index < state.length) {
- if (tokenFinished(state)) {
- break;
- }
- slopText[slopLength] = state.data[state.index];
- slopLength++;
- }
- }
- int fuzziness = 0;
- try {
- fuzziness = Integer.parseInt(new String(slopText, 0, slopLength));
- } catch (NumberFormatException e) {
- // swallow number format exceptions parsing fuzziness
- }
- // negative -> 0
- if (fuzziness < 0) {
- fuzziness = 0;
- }
- return fuzziness;
- }
- return 0;
- }
-
- /**
- * Helper returning true if the state has reached the end of token.
- */
- private boolean tokenFinished(State state) {
- if ((state.data[state.index] == '"' && (flags & PHRASE_OPERATOR) != 0)
- || (state.data[state.index] == '|' && (flags & OR_OPERATOR) != 0)
- || (state.data[state.index] == '+' && (flags & AND_OPERATOR) != 0)
- || (state.data[state.index] == '(' && (flags & PRECEDENCE_OPERATORS) != 0)
- || (state.data[state.index] == ')' && (flags & PRECEDENCE_OPERATORS) != 0)
- || ((state.data[state.index] == ' '
- || state.data[state.index] == '\t'
- || state.data[state.index] == '\n'
- || state.data[state.index] == '\r') && (flags & WHITESPACE_OPERATOR) != 0)) {
- return true;
- }
- return false;
- }
-
- /**
- * Factory method to generate a standard query (no phrase or prefix operators).
- */
- protected Query newDefaultQuery(String text) {
- BooleanQuery bq = new BooleanQuery(true);
- for (Map.Entry entry : weights.entrySet()) {
- Query q = createBooleanQuery(entry.getKey(), text, defaultOperator);
- if (q != null) {
- q.setBoost(entry.getValue());
- bq.add(q, BooleanClause.Occur.SHOULD);
- }
- }
- return simplify(bq);
- }
-
- /**
- * Factory method to generate a fuzzy query.
- */
- protected Query newFuzzyQuery(String text, int fuzziness) {
- BooleanQuery bq = new BooleanQuery(true);
- for (Map.Entry entry : weights.entrySet()) {
- Query q = new FuzzyQuery(new Term(entry.getKey(), text), fuzziness);
- if (q != null) {
- q.setBoost(entry.getValue());
- bq.add(q, BooleanClause.Occur.SHOULD);
- }
- }
- return simplify(bq);
- }
-
- /**
- * Factory method to generate a phrase query with slop.
- */
- protected Query newPhraseQuery(String text, int slop) {
- BooleanQuery bq = new BooleanQuery(true);
- for (Map.Entry entry : weights.entrySet()) {
- Query q = createPhraseQuery(entry.getKey(), text, slop);
- if (q != null) {
- q.setBoost(entry.getValue());
- bq.add(q, BooleanClause.Occur.SHOULD);
- }
- }
- return simplify(bq);
- }
-
- /**
- * Factory method to generate a prefix query.
- */
- protected Query newPrefixQuery(String text) {
- BooleanQuery bq = new BooleanQuery(true);
- for (Map.Entry entry : weights.entrySet()) {
- PrefixQuery prefix = new PrefixQuery(new Term(entry.getKey(), text));
- prefix.setBoost(entry.getValue());
- bq.add(prefix, BooleanClause.Occur.SHOULD);
- }
- return simplify(bq);
- }
-
- /**
- * Helper to simplify boolean queries with 0 or 1 clause
- */
- protected Query simplify(BooleanQuery bq) {
- if (bq.clauses().isEmpty()) {
- return null;
- } else if (bq.clauses().size() == 1) {
- return bq.clauses().get(0).getQuery();
- } else {
- return bq;
- }
- }
-
- /**
- * Returns the implicit operator setting, which will be
- * either {@code SHOULD} or {@code MUST}.
- */
- public BooleanClause.Occur getDefaultOperator() {
- return defaultOperator;
- }
-
- /**
- * Sets the implicit operator setting, which must be
- * either {@code SHOULD} or {@code MUST}.
- */
- public void setDefaultOperator(BooleanClause.Occur operator) {
- if (operator != BooleanClause.Occur.SHOULD && operator != BooleanClause.Occur.MUST) {
- throw new IllegalArgumentException("invalid operator: only SHOULD or MUST are allowed");
- }
- this.defaultOperator = operator;
- }
-
- static class State {
- final char[] data; // the characters in the query string
- final char[] buffer; // a temporary buffer used to reduce necessary allocations
- int index;
- int length;
-
- BooleanClause.Occur currentOperation;
- BooleanClause.Occur previousOperation;
- int not;
-
- Query top;
-
- State(char[] data, char[] buffer, int index, int length) {
- this.data = data;
- this.buffer = buffer;
- this.index = index;
- this.length = length;
- }
- }
-}
diff --git a/src/main/java/org/apache/lucene/search/XReferenceManager.java b/src/main/java/org/apache/lucene/search/XReferenceManager.java
deleted file mode 100644
index 07fb066fa8c94..0000000000000
--- a/src/main/java/org/apache/lucene/search/XReferenceManager.java
+++ /dev/null
@@ -1,326 +0,0 @@
-package org.apache.lucene.search;
-
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-import org.apache.lucene.store.AlreadyClosedException;
-import org.apache.lucene.util.Version;
-
-import java.io.Closeable;
-import java.io.IOException;
-import java.util.List;
-import java.util.concurrent.CopyOnWriteArrayList;
-import java.util.concurrent.locks.Lock;
-import java.util.concurrent.locks.ReentrantLock;
-
-/**
- * Utility class to safely share instances of a certain type across multiple
- * threads, while periodically refreshing them. This class ensures each
- * reference is closed only once all threads have finished using it. It is
- * recommended to consult the documentation of {@link org.apache.lucene.search.XReferenceManager}
- * implementations for their {@link #maybeRefresh()} semantics.
- *
- * @param
- * the concrete type that will be {@link #acquire() acquired} and
- * {@link #release(Object) released}.
- *
- * @lucene.experimental
- */
-public abstract class XReferenceManager implements Closeable {
- static {
- assert Version.LUCENE_46 == org.elasticsearch.Version.CURRENT.luceneVersion : "Remove this once we are on LUCENE_47 - see LUCENE-5436";
- }
-
- private static final String REFERENCE_MANAGER_IS_CLOSED_MSG = "this ReferenceManager is closed";
-
- protected volatile G current;
-
- private final Lock refreshLock = new ReentrantLock();
-
- private final List refreshListeners = new CopyOnWriteArrayList();
-
- private void ensureOpen() {
- if (current == null) {
- throw new AlreadyClosedException(REFERENCE_MANAGER_IS_CLOSED_MSG);
- }
- }
-
- private synchronized void swapReference(G newReference) throws IOException {
- ensureOpen();
- final G oldReference = current;
- current = newReference;
- release(oldReference);
- }
-
- /**
- * Decrement reference counting on the given reference.
- * @throws java.io.IOException if reference decrement on the given resource failed.
- * */
- protected abstract void decRef(G reference) throws IOException;
-
- /**
- * Refresh the given reference if needed. Returns {@code null} if no refresh
- * was needed, otherwise a new refreshed reference.
- * @throws org.apache.lucene.store.AlreadyClosedException if the reference manager has been {@link #close() closed}.
- * @throws java.io.IOException if the refresh operation failed
- */
- protected abstract G refreshIfNeeded(G referenceToRefresh) throws IOException;
-
- /**
- * Try to increment reference counting on the given reference. Return true if
- * the operation was successful.
- * @throws org.apache.lucene.store.AlreadyClosedException if the reference manager has been {@link #close() closed}.
- */
- protected abstract boolean tryIncRef(G reference) throws IOException;
-
- /**
- * Obtain the current reference. You must match every call to acquire with one
- * call to {@link #release}; it's best to do so in a finally clause, and set
- * the reference to {@code null} to prevent accidental usage after it has been
- * released.
- * @throws org.apache.lucene.store.AlreadyClosedException if the reference manager has been {@link #close() closed}.
- */
- public final G acquire() throws IOException {
- G ref;
-
- do {
- if ((ref = current) == null) {
- throw new AlreadyClosedException(REFERENCE_MANAGER_IS_CLOSED_MSG);
- }
- if (tryIncRef(ref)) {
- return ref;
- }
- if (getRefCount(ref) == 0 && current == ref) {
- assert ref != null;
- /* if we can't increment the reader but we are
- still the current reference the RM is in a
- illegal states since we can't make any progress
- anymore. The reference is closed but the RM still
- holds on to it as the actual instance.
- This can only happen if somebody outside of the RM
- decrements the refcount without a corresponding increment
- since the RM assigns the new reference before counting down
- the reference. */
- throw new IllegalStateException("The managed reference has already closed - this is likely a bug when the reference count is modified outside of the ReferenceManager");
- }
- } while (true);
- }
-
- /**
- *
- * Closes this ReferenceManager to prevent future {@link #acquire() acquiring}. A
- * reference manager should be closed if the reference to the managed resource
- * should be disposed or the application using the {@link org.apache.lucene.search.XReferenceManager}
- * is shutting down. The managed resource might not be released immediately,
- * if the {@link org.apache.lucene.search.XReferenceManager} user is holding on to a previously
- * {@link #acquire() acquired} reference. The resource will be released once
- * when the last reference is {@link #release(Object) released}. Those
- * references can still be used as if the manager was still active.
- *
- *
- * Applications should not {@link #acquire() acquire} new references from this
- * manager once this method has been called. {@link #acquire() Acquiring} a
- * resource on a closed {@link org.apache.lucene.search.XReferenceManager} will throw an
- * {@link org.apache.lucene.store.AlreadyClosedException}.
- *
- *
- * @throws java.io.IOException
- * if the underlying reader of the current reference could not be closed
- */
- @Override
- public final synchronized void close() throws IOException {
- if (current != null) {
- // make sure we can call this more than once
- // closeable javadoc says:
- // if this is already closed then invoking this method has no effect.
- swapReference(null);
- afterClose();
- }
- }
-
- /**
- * Returns the current reference count of the given reference.
- */
- protected abstract int getRefCount(G reference);
-
- /**
- * Called after close(), so subclass can free any resources.
- * @throws java.io.IOException if the after close operation in a sub-class throws an {@link java.io.IOException}
- * */
- protected void afterClose() throws IOException {
- }
-
- private void doMaybeRefresh() throws IOException {
- // it's ok to call lock() here (blocking) because we're supposed to get here
- // from either maybeRefreh() or maybeRefreshBlocking(), after the lock has
- // already been obtained. Doing that protects us from an accidental bug
- // where this method will be called outside the scope of refreshLock.
- // Per ReentrantLock's javadoc, calling lock() by the same thread more than
- // once is ok, as long as unlock() is called a matching number of times.
- refreshLock.lock();
- boolean refreshed = false;
- try {
- final G reference = acquire();
- try {
- notifyRefreshListenersBefore();
- G newReference = refreshIfNeeded(reference);
- if (newReference != null) {
- assert newReference != reference : "refreshIfNeeded should return null if refresh wasn't needed";
- try {
- swapReference(newReference);
- refreshed = true;
- } finally {
- if (!refreshed) {
- release(newReference);
- }
- }
- }
- } finally {
- release(reference);
- notifyRefreshListenersRefreshed(refreshed);
- }
- afterMaybeRefresh();
- } finally {
- refreshLock.unlock();
- }
- }
-
- /**
- * You must call this (or {@link #maybeRefreshBlocking()}), periodically, if
- * you want that {@link #acquire()} will return refreshed instances.
- *
- *
- * Threads: it's fine for more than one thread to call this at once.
- * Only the first thread will attempt the refresh; subsequent threads will see
- * that another thread is already handling refresh and will return
- * immediately. Note that this means if another thread is already refreshing
- * then subsequent threads will return right away without waiting for the
- * refresh to complete.
- *
- *
- * If this method returns true it means the calling thread either refreshed or
- * that there were no changes to refresh. If it returns false it means another
- * thread is currently refreshing.
- *
- * @throws java.io.IOException if refreshing the resource causes an {@link java.io.IOException}
- * @throws org.apache.lucene.store.AlreadyClosedException if the reference manager has been {@link #close() closed}.
- */
- public final boolean maybeRefresh() throws IOException {
- ensureOpen();
-
- // Ensure only 1 thread does refresh at once; other threads just return immediately:
- final boolean doTryRefresh = refreshLock.tryLock();
- if (doTryRefresh) {
- try {
- doMaybeRefresh();
- } finally {
- refreshLock.unlock();
- }
- }
-
- return doTryRefresh;
- }
-
- /**
- * You must call this (or {@link #maybeRefresh()}), periodically, if you want
- * that {@link #acquire()} will return refreshed instances.
- *
- *
- * Threads: unlike {@link #maybeRefresh()}, if another thread is
- * currently refreshing, this method blocks until that thread completes. It is
- * useful if you want to guarantee that the next call to {@link #acquire()}
- * will return a refreshed instance. Otherwise, consider using the
- * non-blocking {@link #maybeRefresh()}.
- * @throws java.io.IOException if refreshing the resource causes an {@link java.io.IOException}
- * @throws org.apache.lucene.store.AlreadyClosedException if the reference manager has been {@link #close() closed}.
- */
- public final void maybeRefreshBlocking() throws IOException {
- ensureOpen();
-
- // Ensure only 1 thread does refresh at once
- refreshLock.lock();
- try {
- doMaybeRefresh();
- } finally {
- refreshLock.unlock();
- }
- }
-
- /** Called after a refresh was attempted, regardless of
- * whether a new reference was in fact created.
- * @throws java.io.IOException if a low level I/O exception occurs
- **/
- protected void afterMaybeRefresh() throws IOException {
- }
-
- /**
- * Release the reference previously obtained via {@link #acquire()}.
- *
- * NOTE: it's safe to call this after {@link #close()}.
- * @throws java.io.IOException if the release operation on the given resource throws an {@link java.io.IOException}
- */
- public final void release(G reference) throws IOException {
- assert reference != null;
- decRef(reference);
- }
-
- private void notifyRefreshListenersBefore() throws IOException {
- for (RefreshListener refreshListener : refreshListeners) {
- refreshListener.beforeRefresh();
- }
- }
-
- private void notifyRefreshListenersRefreshed(boolean didRefresh) throws IOException {
- for (RefreshListener refreshListener : refreshListeners) {
- refreshListener.afterRefresh(didRefresh);
- }
- }
-
- /**
- * Adds a listener, to be notified when a reference is refreshed/swapped.
- */
- public void addListener(RefreshListener listener) {
- if (listener == null) {
- throw new NullPointerException("Listener cannot be null");
- }
- refreshListeners.add(listener);
- }
-
- /**
- * Remove a listener added with {@link #addListener(RefreshListener)}.
- */
- public void removeListener(RefreshListener listener) {
- if (listener == null) {
- throw new NullPointerException("Listener cannot be null");
- }
- refreshListeners.remove(listener);
- }
-
- /** Use to receive notification when a refresh has
- * finished. See {@link #addListener}. */
- public interface RefreshListener {
-
- /** Called right before a refresh attempt starts. */
- void beforeRefresh() throws IOException;
-
- /** Called after the attempted refresh; if the refresh
- * did open a new reference then didRefresh will be true
- * and {@link #acquire()} is guaranteed to return the new
- * reference. */
- void afterRefresh(boolean didRefresh) throws IOException;
- }
-}
diff --git a/src/main/java/org/apache/lucene/search/XSearcherManager.java b/src/main/java/org/apache/lucene/search/XSearcherManager.java
deleted file mode 100644
index 36c74a0872847..0000000000000
--- a/src/main/java/org/apache/lucene/search/XSearcherManager.java
+++ /dev/null
@@ -1,177 +0,0 @@
-package org.apache.lucene.search;
-
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-import java.io.IOException;
-
-import org.apache.lucene.index.IndexReader;
-import org.apache.lucene.index.DirectoryReader;
-import org.apache.lucene.index.IndexWriter;
-import org.apache.lucene.store.Directory;
-import org.apache.lucene.util.Version;
-
-/**
- * Utility class to safely share {@link IndexSearcher} instances across multiple
- * threads, while periodically reopening. This class ensures each searcher is
- * closed only once all threads have finished using it.
- *
- *
- * Use {@link #acquire} to obtain the current searcher, and {@link #release} to
- * release it, like this:
- *
- *
- * IndexSearcher s = manager.acquire();
- * try {
- * // Do searching, doc retrieval, etc. with s
- * } finally {
- * manager.release(s);
- * }
- * // Do not use s after this!
- * s = null;
- *
- *
- *
- * In addition you should periodically call {@link #maybeRefresh}. While it's
- * possible to call this just before running each query, this is discouraged
- * since it penalizes the unlucky queries that do the reopen. It's better to use
- * a separate background thread, that periodically calls maybeReopen. Finally,
- * be sure to call {@link #close} once you are done.
- *
- * @see SearcherFactory
- *
- * @lucene.experimental
- */
-public final class XSearcherManager extends XReferenceManager {
-
- static {
- assert Version.LUCENE_46 == org.elasticsearch.Version.CURRENT.luceneVersion : "Remove this once we are on LUCENE_47 - see LUCENE-5436";
- }
-
- private final SearcherFactory searcherFactory;
-
- /**
- * Creates and returns a new XSearcherManager from the given
- * {@link IndexWriter}.
- *
- * @param writer
- * the IndexWriter to open the IndexReader from.
- * @param applyAllDeletes
- * If true
, all buffered deletes will be applied (made
- * visible) in the {@link IndexSearcher} / {@link DirectoryReader}.
- * If false
, the deletes may or may not be applied, but
- * remain buffered (in IndexWriter) so that they will be applied in
- * the future. Applying deletes can be costly, so if your app can
- * tolerate deleted documents being returned you might gain some
- * performance by passing false
. See
- * {@link DirectoryReader#openIfChanged(DirectoryReader, IndexWriter, boolean)}.
- * @param searcherFactory
- * An optional {@link SearcherFactory}. Pass null
if you
- * don't require the searcher to be warmed before going live or other
- * custom behavior.
- *
- * @throws IOException if there is a low-level I/O error
- */
- public XSearcherManager(IndexWriter writer, boolean applyAllDeletes, SearcherFactory searcherFactory) throws IOException {
- if (searcherFactory == null) {
- searcherFactory = new SearcherFactory();
- }
- this.searcherFactory = searcherFactory;
- current = getSearcher(searcherFactory, DirectoryReader.open(writer, applyAllDeletes));
- }
-
- /**
- * Creates and returns a new XSearcherManager from the given {@link Directory}.
- * @param dir the directory to open the DirectoryReader on.
- * @param searcherFactory An optional {@link SearcherFactory}. Pass
- * null
if you don't require the searcher to be warmed
- * before going live or other custom behavior.
- *
- * @throws IOException if there is a low-level I/O error
- */
- public XSearcherManager(Directory dir, SearcherFactory searcherFactory) throws IOException {
- if (searcherFactory == null) {
- searcherFactory = new SearcherFactory();
- }
- this.searcherFactory = searcherFactory;
- current = getSearcher(searcherFactory, DirectoryReader.open(dir));
- }
-
- @Override
- protected void decRef(IndexSearcher reference) throws IOException {
- reference.getIndexReader().decRef();
- }
-
- @Override
- protected IndexSearcher refreshIfNeeded(IndexSearcher referenceToRefresh) throws IOException {
- final IndexReader r = referenceToRefresh.getIndexReader();
- assert r instanceof DirectoryReader: "searcher's IndexReader should be a DirectoryReader, but got " + r;
- final IndexReader newReader = DirectoryReader.openIfChanged((DirectoryReader) r);
- if (newReader == null) {
- return null;
- } else {
- return getSearcher(searcherFactory, newReader);
- }
- }
-
- @Override
- protected boolean tryIncRef(IndexSearcher reference) {
- return reference.getIndexReader().tryIncRef();
- }
-
- @Override
- protected int getRefCount(IndexSearcher reference) {
- return reference.getIndexReader().getRefCount();
- }
-
- /**
- * Returns true
if no changes have occured since this searcher
- * ie. reader was opened, otherwise false
.
- * @see DirectoryReader#isCurrent()
- */
- public boolean isSearcherCurrent() throws IOException {
- final IndexSearcher searcher = acquire();
- try {
- final IndexReader r = searcher.getIndexReader();
- assert r instanceof DirectoryReader: "searcher's IndexReader should be a DirectoryReader, but got " + r;
- return ((DirectoryReader) r).isCurrent();
- } finally {
- release(searcher);
- }
- }
-
- /** Expert: creates a searcher from the provided {@link
- * IndexReader} using the provided {@link
- * SearcherFactory}. NOTE: this decRefs incoming reader
- * on throwing an exception. */
- public static IndexSearcher getSearcher(SearcherFactory searcherFactory, IndexReader reader) throws IOException {
- boolean success = false;
- final IndexSearcher searcher;
- try {
- searcher = searcherFactory.newSearcher(reader);
- if (searcher.getIndexReader() != reader) {
- throw new IllegalStateException("SearcherFactory must wrap exactly the provided reader (got " + searcher.getIndexReader() + " but expected " + reader + ")");
- }
- success = true;
- } finally {
- if (!success) {
- reader.decRef();
- }
- }
- return searcher;
- }
-}
diff --git a/src/main/java/org/apache/lucene/search/suggest/analyzing/XAnalyzingSuggester.java b/src/main/java/org/apache/lucene/search/suggest/analyzing/XAnalyzingSuggester.java
index 6e8b0a1ac0fd2..4c80716d8567a 100644
--- a/src/main/java/org/apache/lucene/search/suggest/analyzing/XAnalyzingSuggester.java
+++ b/src/main/java/org/apache/lucene/search/suggest/analyzing/XAnalyzingSuggester.java
@@ -171,7 +171,10 @@ public class XAnalyzingSuggester extends Lookup {
/** Whether position holes should appear in the automaton. */
private boolean preservePositionIncrements;
- /**
+ /** Number of entries the lookup was built with */
+ private long count = 0;
+
+ /**
* Calls {@link #XAnalyzingSuggester(Analyzer,Analyzer,int,int,int,boolean,FST,boolean,int,int,int,int,int)
* AnalyzingSuggester(analyzer, analyzer, EXACT_FIRST |
* PRESERVE_SEP, 256, -1)}
@@ -419,6 +422,7 @@ public void build(InputIterator iterator) throws IOException {
TokenStreamToAutomaton ts2a = getTokenStreamToAutomaton();
boolean success = false;
+ count = 0;
byte buffer[] = new byte[8];
try {
ByteArrayDataOutput output = new ByteArrayDataOutput(buffer);
@@ -483,6 +487,7 @@ public void build(InputIterator iterator) throws IOException {
writer.write(buffer, 0, output.getPosition());
}
+ count++;
}
writer.close();
@@ -614,7 +619,12 @@ public boolean store(OutputStream output) throws IOException {
return true;
}
- @Override
+ @Override
+ public long getCount() {
+ return count;
+ }
+
+ @Override
public boolean load(InputStream input) throws IOException {
DataInput dataIn = new InputStreamDataInput(input);
try {
@@ -837,7 +847,29 @@ protected boolean acceptResult(IntsRef input, Pair output) {
}
}
- /** Returns all completion paths to initialize the search. */
+ @Override
+ public boolean store(DataOutput output) throws IOException {
+ output.writeVLong(count);
+ if (fst == null) {
+ return false;
+ }
+
+ fst.save(output);
+ output.writeVInt(maxAnalyzedPathsForOneInput);
+ output.writeByte((byte) (hasPayloads ? 1 : 0));
+ return true;
+ }
+
+ @Override
+ public boolean load(DataInput input) throws IOException {
+ count = input.readVLong();
+ this.fst = new FST>(input, new PairOutputs(PositiveIntOutputs.getSingleton(), ByteSequenceOutputs.getSingleton()));
+ maxAnalyzedPathsForOneInput = input.readVInt();
+ hasPayloads = input.readByte() == 1;
+ return true;
+ }
+
+ /** Returns all completion paths to initialize the search. */
protected List>> getFullPrefixPaths(List>> prefixPaths,
Automaton lookupAutomaton,
FST> fst)
diff --git a/src/main/java/org/elasticsearch/Version.java b/src/main/java/org/elasticsearch/Version.java
index 5db058fcab813..d4be117d5782b 100644
--- a/src/main/java/org/elasticsearch/Version.java
+++ b/src/main/java/org/elasticsearch/Version.java
@@ -156,9 +156,9 @@ public class Version implements Serializable {
public static final int V_1_0_0_ID = /*00*/1000099;
public static final Version V_1_0_0 = new Version(V_1_0_0_ID, false, org.apache.lucene.util.Version.LUCENE_46);
public static final int V_1_1_0_ID = /*00*/1010099;
- public static final Version V_1_1_0 = new Version(V_1_1_0_ID, true, org.apache.lucene.util.Version.LUCENE_46);
+ public static final Version V_1_1_0 = new Version(V_1_1_0_ID, true, org.apache.lucene.util.Version.LUCENE_47);
public static final int V_2_0_0_ID = /*00*/2000099;
- public static final Version V_2_0_0 = new Version(V_2_0_0_ID, true, org.apache.lucene.util.Version.LUCENE_46);
+ public static final Version V_2_0_0 = new Version(V_2_0_0_ID, true, org.apache.lucene.util.Version.LUCENE_47);
public static final Version CURRENT = V_2_0_0;
diff --git a/src/main/java/org/elasticsearch/common/lucene/Lucene.java b/src/main/java/org/elasticsearch/common/lucene/Lucene.java
index b7028b1dc70e2..29aac65960f4e 100644
--- a/src/main/java/org/elasticsearch/common/lucene/Lucene.java
+++ b/src/main/java/org/elasticsearch/common/lucene/Lucene.java
@@ -44,7 +44,7 @@
*/
public class Lucene {
- public static final Version VERSION = Version.LUCENE_46;
+ public static final Version VERSION = Version.LUCENE_47;
public static final Version ANALYZER_VERSION = VERSION;
public static final Version QUERYPARSER_VERSION = VERSION;
@@ -60,6 +60,9 @@ public static Version parseVersion(@Nullable String version, Version defaultVers
if (version == null) {
return defaultVersion;
}
+ if ("4.7".equals(version)) {
+ return VERSION.LUCENE_47;
+ }
if ("4.6".equals(version)) {
return VERSION.LUCENE_46;
}
diff --git a/src/main/java/org/elasticsearch/env/NodeEnvironment.java b/src/main/java/org/elasticsearch/env/NodeEnvironment.java
index db271400d6e3b..5125a9928def6 100644
--- a/src/main/java/org/elasticsearch/env/NodeEnvironment.java
+++ b/src/main/java/org/elasticsearch/env/NodeEnvironment.java
@@ -23,6 +23,7 @@
import com.google.common.primitives.Ints;
import org.apache.lucene.store.Lock;
import org.apache.lucene.store.NativeFSLockFactory;
+import org.apache.lucene.util.IOUtils;
import org.elasticsearch.ElasticsearchIllegalStateException;
import org.elasticsearch.cluster.node.DiscoveryNode;
import org.elasticsearch.common.component.AbstractComponent;
@@ -87,11 +88,7 @@ public NodeEnvironment(Settings settings, Environment environment) {
// release all the ones that were obtained up until now
for (int i = 0; i < locks.length; i++) {
if (locks[i] != null) {
- try {
- locks[i].release();
- } catch (Exception e1) {
- // ignore
- }
+ IOUtils.closeWhileHandlingException(locks[i]);
}
locks[i] = null;
}
@@ -102,13 +99,7 @@ public NodeEnvironment(Settings settings, Environment environment) {
lastException = new IOException("failed to obtain lock on " + dir.getAbsolutePath(), e);
// release all the ones that were obtained up until now
for (int i = 0; i < locks.length; i++) {
- if (locks[i] != null) {
- try {
- locks[i].release();
- } catch (Exception e1) {
- // ignore
- }
- }
+ IOUtils.closeWhileHandlingException(locks[i]);
locks[i] = null;
}
break;
@@ -235,7 +226,7 @@ public void close() {
for (Lock lock : locks) {
try {
logger.trace("releasing lock [{}]", lock);
- lock.release();
+ lock.close();
} catch (IOException e) {
logger.trace("failed to release lock [{}]", e, lock);
}
diff --git a/src/main/java/org/elasticsearch/index/analysis/ASCIIFoldingTokenFilterFactory.java b/src/main/java/org/elasticsearch/index/analysis/ASCIIFoldingTokenFilterFactory.java
index c6b11d72c1879..81862618c2f20 100644
--- a/src/main/java/org/elasticsearch/index/analysis/ASCIIFoldingTokenFilterFactory.java
+++ b/src/main/java/org/elasticsearch/index/analysis/ASCIIFoldingTokenFilterFactory.java
@@ -20,7 +20,7 @@
package org.elasticsearch.index.analysis;
import org.apache.lucene.analysis.TokenStream;
-import org.apache.lucene.analysis.miscellaneous.XASCIIFoldingFilter;
+import org.apache.lucene.analysis.miscellaneous.ASCIIFoldingFilter;
import org.elasticsearch.common.inject.Inject;
import org.elasticsearch.common.inject.assistedinject.Assisted;
import org.elasticsearch.common.settings.Settings;
@@ -41,6 +41,6 @@ public ASCIIFoldingTokenFilterFactory(Index index, @IndexSettings Settings index
@Override
public TokenStream create(TokenStream tokenStream) {
- return new XASCIIFoldingFilter(tokenStream, preserveOriginal);
+ return new ASCIIFoldingFilter(tokenStream, preserveOriginal);
}
}
diff --git a/src/main/java/org/elasticsearch/index/analysis/ShingleTokenFilterFactory.java b/src/main/java/org/elasticsearch/index/analysis/ShingleTokenFilterFactory.java
index 7b812283a5874..b8c1e9e497610 100644
--- a/src/main/java/org/elasticsearch/index/analysis/ShingleTokenFilterFactory.java
+++ b/src/main/java/org/elasticsearch/index/analysis/ShingleTokenFilterFactory.java
@@ -41,7 +41,7 @@ public ShingleTokenFilterFactory(Index index, @IndexSettings Settings indexSetti
Integer minShingleSize = settings.getAsInt("min_shingle_size", ShingleFilter.DEFAULT_MIN_SHINGLE_SIZE);
Boolean outputUnigrams = settings.getAsBoolean("output_unigrams", true);
Boolean outputUnigramsIfNoShingles = settings.getAsBoolean("output_unigrams_if_no_shingles", false);
- String tokenSeparator = settings.get("token_separator", ShingleFilter.TOKEN_SEPARATOR);
+ String tokenSeparator = settings.get("token_separator", ShingleFilter.DEFAULT_TOKEN_SEPARATOR);
factory = new Factory("shingle", minShingleSize, maxShingleSize, outputUnigrams, outputUnigramsIfNoShingles, tokenSeparator);
}
@@ -64,13 +64,13 @@ public static final class Factory implements TokenFilterFactory {
private final boolean outputUnigramsIfNoShingles;
private final String tokenSeparator;
-
+
private int minShingleSize;
private final String name;
public Factory(String name) {
- this(name, ShingleFilter.DEFAULT_MIN_SHINGLE_SIZE, ShingleFilter.DEFAULT_MAX_SHINGLE_SIZE, true, false, ShingleFilter.TOKEN_SEPARATOR);
+ this(name, ShingleFilter.DEFAULT_MIN_SHINGLE_SIZE, ShingleFilter.DEFAULT_MAX_SHINGLE_SIZE, true, false, ShingleFilter.DEFAULT_TOKEN_SEPARATOR);
}
Factory(String name, int minShingleSize, int maxShingleSize, boolean outputUnigrams, boolean outputUnigramsIfNoShingles, String tokenSeparator) {
diff --git a/src/main/java/org/elasticsearch/index/engine/internal/InternalEngine.java b/src/main/java/org/elasticsearch/index/engine/internal/InternalEngine.java
index 687ad6718e2cc..8e2cb3565f585 100644
--- a/src/main/java/org/elasticsearch/index/engine/internal/InternalEngine.java
+++ b/src/main/java/org/elasticsearch/index/engine/internal/InternalEngine.java
@@ -25,7 +25,7 @@
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.SearcherFactory;
-import org.apache.lucene.search.XSearcherManager;
+import org.apache.lucene.search.SearcherManager;
import org.apache.lucene.store.AlreadyClosedException;
import org.apache.lucene.store.LockObtainFailedException;
import org.apache.lucene.util.BytesRef;
@@ -120,7 +120,7 @@ public class InternalEngine extends AbstractIndexShardComponent implements Engin
private volatile IndexWriter indexWriter;
private final SearcherFactory searcherFactory = new SearchFactory();
- private volatile XSearcherManager searcherManager;
+ private volatile SearcherManager searcherManager;
private volatile boolean closed = false;
@@ -680,7 +680,7 @@ public void delete(DeleteByQuery delete) throws EngineException {
@Override
public final Searcher acquireSearcher(String source) throws EngineException {
- XSearcherManager manager = this.searcherManager;
+ SearcherManager manager = this.searcherManager;
if (manager == null) {
throw new EngineClosedException(shardId);
}
@@ -693,7 +693,7 @@ public final Searcher acquireSearcher(String source) throws EngineException {
}
}
- protected Searcher newSearcher(String source, IndexSearcher searcher, XSearcherManager manager) {
+ protected Searcher newSearcher(String source, IndexSearcher searcher, SearcherManager manager) {
return new EngineSearcher(source, searcher, manager);
}
@@ -797,7 +797,7 @@ public void flush(Flush flush) throws EngineException {
translog.newTranslog(translogId);
}
- XSearcherManager current = this.searcherManager;
+ SearcherManager current = this.searcherManager;
this.searcherManager = buildSearchManager(indexWriter);
try {
IOUtils.close(current);
@@ -1458,18 +1458,18 @@ public void onRefreshSettings(Settings settings) {
}
}
- private XSearcherManager buildSearchManager(IndexWriter indexWriter) throws IOException {
- return new XSearcherManager(indexWriter, true, searcherFactory);
+ private SearcherManager buildSearchManager(IndexWriter indexWriter) throws IOException {
+ return new SearcherManager(indexWriter, true, searcherFactory);
}
class EngineSearcher implements Searcher {
private final String source;
private final IndexSearcher searcher;
- private final XSearcherManager manager;
+ private final SearcherManager manager;
private final AtomicBoolean released;
- private EngineSearcher(String source, IndexSearcher searcher, XSearcherManager manager) {
+ private EngineSearcher(String source, IndexSearcher searcher, SearcherManager manager) {
this.source = source;
this.searcher = searcher;
this.manager = manager;
diff --git a/src/main/java/org/elasticsearch/index/fielddata/fieldcomparator/BytesRefOrdValComparator.java b/src/main/java/org/elasticsearch/index/fielddata/fieldcomparator/BytesRefOrdValComparator.java
index a5aa0860f53c7..d3028ff6acc57 100644
--- a/src/main/java/org/elasticsearch/index/fielddata/fieldcomparator/BytesRefOrdValComparator.java
+++ b/src/main/java/org/elasticsearch/index/fielddata/fieldcomparator/BytesRefOrdValComparator.java
@@ -121,25 +121,26 @@ public int compareBottom(int doc) {
}
@Override
- public int compareBottomMissing() {
+ public int compareTop(int doc) throws IOException {
throw new UnsupportedOperationException();
}
@Override
- public void copy(int slot, int doc) {
+ public int compareBottomMissing() {
throw new UnsupportedOperationException();
}
@Override
- public void missing(int slot) {
+ public void copy(int slot, int doc) {
throw new UnsupportedOperationException();
}
@Override
- public int compareDocToValue(int doc, BytesRef value) {
+ public void missing(int slot) {
throw new UnsupportedOperationException();
}
+
class PerSegmentComparator extends NestedWrappableComparator {
final Ordinals.Docs readerOrds;
final BytesValues.WithOrdinals termsIndex;
@@ -167,6 +168,11 @@ public void setBottom(final int bottom) {
BytesRefOrdValComparator.this.setBottom(bottom);
}
+ @Override
+ public void setTopValue(BytesRef value) {
+ // NOCOMMIT -- implement
+ }
+
@Override
public BytesRef value(int slot) {
return BytesRefOrdValComparator.this.value(slot);
@@ -185,12 +191,13 @@ public int compareValues(BytesRef val1, BytesRef val2) {
return val1.compareTo(val2);
}
- @Override
- public int compareDocToValue(int doc, BytesRef value) {
- final long ord = getOrd(doc);
- final BytesRef docValue = ord == Ordinals.MISSING_ORDINAL ? missingValue : termsIndex.getValueByOrd(ord);
- return compareValues(docValue, value);
- }
+ // NOCOMMIT - remove
+// @Override
+// public int compareDocToValue(int doc, BytesRef value) {
+// final long ord = getOrd(doc);
+// final BytesRef docValue = ord == Ordinals.MISSING_ORDINAL ? missingValue : termsIndex.getValueByOrd(ord);
+// return compareValues(docValue, value);
+// }
protected long getOrd(int doc) {
return readerOrds.getOrd(doc);
@@ -204,6 +211,12 @@ public int compareBottom(int doc) {
return LongValuesComparator.compare(bottomOrd, comparableOrd);
}
+ @Override
+ public int compareTop(int doc) throws IOException {
+ // NOCOMMIT - implement
+ return 0;
+ }
+
@Override
public int compareBottomMissing() {
assert bottomSlot != -1;
@@ -326,6 +339,11 @@ public void setBottom(final int bottom) {
readerGen[bottomSlot] = currentReaderGen;
}
+ @Override
+ public void setTopValue(BytesRef value) {
+ // NOCOMMIT
+ }
+
@Override
public BytesRef value(int slot) {
return values[slot];
diff --git a/src/main/java/org/elasticsearch/index/fielddata/fieldcomparator/BytesRefValComparator.java b/src/main/java/org/elasticsearch/index/fielddata/fieldcomparator/BytesRefValComparator.java
index 67b4d7023a2f4..3aef3494d381f 100644
--- a/src/main/java/org/elasticsearch/index/fielddata/fieldcomparator/BytesRefValComparator.java
+++ b/src/main/java/org/elasticsearch/index/fielddata/fieldcomparator/BytesRefValComparator.java
@@ -41,6 +41,7 @@ public final class BytesRefValComparator extends NestedWrappableComparator indexFieldData, int numHits, SortMode sortMode, BytesRef missingValue) {
@@ -63,6 +64,11 @@ public int compareBottom(int doc) throws IOException {
return compareValues(bottom, val2);
}
+ @Override
+ public int compareTop(int doc) throws IOException {
+ return top.compareTo(sortMode.getRelevantValue(docTerms, doc, missingValue));
+ }
+
@Override
public void copy(int slot, int doc) throws IOException {
BytesRef relevantValue = sortMode.getRelevantValue(docTerms, doc, missingValue);
@@ -87,6 +93,11 @@ public void setBottom(final int bottom) {
this.bottom = values[bottom];
}
+ @Override
+ public void setTopValue(BytesRef top) {
+ this.top = top;
+ }
+
@Override
public BytesRef value(int slot) {
return values[slot];
@@ -105,11 +116,6 @@ public int compareValues(BytesRef val1, BytesRef val2) {
return val1.compareTo(val2);
}
- @Override
- public int compareDocToValue(int doc, BytesRef value) {
- return sortMode.getRelevantValue(docTerms, doc, missingValue).compareTo(value);
- }
-
@Override
public void missing(int slot) {
values[slot] = missingValue;
diff --git a/src/main/java/org/elasticsearch/index/fielddata/fieldcomparator/DoubleScriptDataComparator.java b/src/main/java/org/elasticsearch/index/fielddata/fieldcomparator/DoubleScriptDataComparator.java
index 07f0dd4301504..bcf964f935dab 100644
--- a/src/main/java/org/elasticsearch/index/fielddata/fieldcomparator/DoubleScriptDataComparator.java
+++ b/src/main/java/org/elasticsearch/index/fielddata/fieldcomparator/DoubleScriptDataComparator.java
@@ -105,10 +105,10 @@ public int compareBottom(int doc) {
}
@Override
- public int compareDocToValue(int doc, Double val2) throws IOException {
+ public int compareTop(int doc) throws IOException {
script.setNextDocId(doc);
- double val1 = script.runAsDouble();
- return Double.compare(val1, val2);
+ double docValue = script.runAsDouble();
+ return Double.compare(top, docValue);
}
@Override
diff --git a/src/main/java/org/elasticsearch/index/fielddata/fieldcomparator/DoubleValuesComparatorBase.java b/src/main/java/org/elasticsearch/index/fielddata/fieldcomparator/DoubleValuesComparatorBase.java
index 366778d8bc034..fca4655c5f45e 100644
--- a/src/main/java/org/elasticsearch/index/fielddata/fieldcomparator/DoubleValuesComparatorBase.java
+++ b/src/main/java/org/elasticsearch/index/fielddata/fieldcomparator/DoubleValuesComparatorBase.java
@@ -46,10 +46,8 @@ public final int compareBottom(int doc) throws IOException {
}
@Override
- public final int compareDocToValue(int doc, T valueObj) throws IOException {
- final double value = valueObj.doubleValue();
- final double docValue = sortMode.getRelevantValue(readerValues, doc, missingValue);
- return compare(docValue, value);
+ public int compareTop(int doc) throws IOException {
+ return compare(top.doubleValue(), sortMode.getRelevantValue(readerValues, doc, missingValue));
}
@Override
diff --git a/src/main/java/org/elasticsearch/index/fielddata/fieldcomparator/GeoDistanceComparator.java b/src/main/java/org/elasticsearch/index/fielddata/fieldcomparator/GeoDistanceComparator.java
index 510d7d063d55a..948fd4f0b048d 100644
--- a/src/main/java/org/elasticsearch/index/fielddata/fieldcomparator/GeoDistanceComparator.java
+++ b/src/main/java/org/elasticsearch/index/fielddata/fieldcomparator/GeoDistanceComparator.java
@@ -82,9 +82,9 @@ public int compareBottom(int doc) {
}
@Override
- public int compareDocToValue(int doc, Double distance2) throws IOException {
- double distance1 = geoDistanceValues.computeDistance(doc);
- return Double.compare(distance1, distance2);
+ public int compareTop(int doc) throws IOException {
+ double docValue = geoDistanceValues.computeDistance(doc);
+ return Double.compare(top, docValue);
}
@Override
diff --git a/src/main/java/org/elasticsearch/index/fielddata/fieldcomparator/LongValuesComparatorBase.java b/src/main/java/org/elasticsearch/index/fielddata/fieldcomparator/LongValuesComparatorBase.java
index c68ec2817e87e..b57733b2a95d0 100644
--- a/src/main/java/org/elasticsearch/index/fielddata/fieldcomparator/LongValuesComparatorBase.java
+++ b/src/main/java/org/elasticsearch/index/fielddata/fieldcomparator/LongValuesComparatorBase.java
@@ -47,10 +47,8 @@ public final int compareBottom(int doc) throws IOException {
}
@Override
- public final int compareDocToValue(int doc, T valueObj) throws IOException {
- final long value = valueObj.longValue();
- long docValue = sortMode.getRelevantValue(readerValues, doc, missingValue);
- return compare(docValue, value);
+ public int compareTop(int doc) throws IOException {
+ return compare(top.longValue(), sortMode.getRelevantValue(readerValues, doc, missingValue));
}
static final int compare(long left, long right) {
diff --git a/src/main/java/org/elasticsearch/index/fielddata/fieldcomparator/NumberComparatorBase.java b/src/main/java/org/elasticsearch/index/fielddata/fieldcomparator/NumberComparatorBase.java
index e4c66aba667f6..5315b797d91d9 100644
--- a/src/main/java/org/elasticsearch/index/fielddata/fieldcomparator/NumberComparatorBase.java
+++ b/src/main/java/org/elasticsearch/index/fielddata/fieldcomparator/NumberComparatorBase.java
@@ -25,6 +25,7 @@
// This is right now only used for sorting number based fields inside nested objects
public abstract class NumberComparatorBase extends NestedWrappableComparator {
+ protected T top;
/**
* Adds numeric value at the specified doc to the specified slot.
*
@@ -41,4 +42,9 @@ public abstract class NumberComparatorBase extends NestedWrappableComparator<
*/
public abstract void divide(int slot, int divisor);
+ @Override
+ public void setTopValue(T top) {
+ this.top = top;
+ }
+
}
diff --git a/src/main/java/org/elasticsearch/index/fielddata/fieldcomparator/StringScriptDataComparator.java b/src/main/java/org/elasticsearch/index/fielddata/fieldcomparator/StringScriptDataComparator.java
index 61332499910ae..dd8965b284834 100644
--- a/src/main/java/org/elasticsearch/index/fielddata/fieldcomparator/StringScriptDataComparator.java
+++ b/src/main/java/org/elasticsearch/index/fielddata/fieldcomparator/StringScriptDataComparator.java
@@ -34,6 +34,8 @@
*/
public class StringScriptDataComparator extends FieldComparator {
+ private BytesRef top;
+
public static IndexFieldData.XFieldComparatorSource comparatorSource(SearchScript script) {
return new InnerSource(script);
}
@@ -110,12 +112,12 @@ public int compareBottom(int doc) {
}
@Override
- public int compareDocToValue(int doc, BytesRef val2) throws IOException {
+ public int compareTop(int doc) throws IOException {
script.setNextDocId(doc);
setSpare(doc);
- return spare.compareTo(val2);
+ return top.compareTo(spare);
}
-
+
private void setSpare(int doc) {
if (spareDoc == doc) {
return;
@@ -140,6 +142,11 @@ public void setBottom(final int bottom) {
this.bottom = values[bottom];
}
+ @Override
+ public void setTopValue(BytesRef top) {
+ this.top = top;
+ }
+
@Override
public BytesRef value(int slot) {
return values[slot];
diff --git a/src/main/java/org/elasticsearch/index/gateway/fs/FsIndexShardGateway.java b/src/main/java/org/elasticsearch/index/gateway/fs/FsIndexShardGateway.java
index 03eb534d3ce74..2fc3888292d24 100644
--- a/src/main/java/org/elasticsearch/index/gateway/fs/FsIndexShardGateway.java
+++ b/src/main/java/org/elasticsearch/index/gateway/fs/FsIndexShardGateway.java
@@ -80,7 +80,7 @@ public FsSnapshotLock(Lock lock) {
@Override
public void release() {
try {
- lock.release();
+ lock.close();
} catch (IOException e) {
logger.warn("failed to release snapshot lock [{}]", e, lock);
}
diff --git a/src/main/java/org/elasticsearch/index/query/SimpleQueryParser.java b/src/main/java/org/elasticsearch/index/query/SimpleQueryParser.java
index fa45fc85037e8..fa04be2917e91 100644
--- a/src/main/java/org/elasticsearch/index/query/SimpleQueryParser.java
+++ b/src/main/java/org/elasticsearch/index/query/SimpleQueryParser.java
@@ -19,24 +19,16 @@
package org.elasticsearch.index.query;
import org.apache.lucene.analysis.Analyzer;
-import org.apache.lucene.queryparser.XSimpleQueryParser;
import org.apache.lucene.search.Query;
-import org.elasticsearch.ElasticsearchIllegalStateException;
-import org.elasticsearch.index.mapper.FieldMapper;
-import org.elasticsearch.index.mapper.MapperService;
-import java.io.IOException;
-import java.util.Collections;
import java.util.Locale;
import java.util.Map;
-import static org.elasticsearch.index.query.support.QueryParsers.wrapSmartNameQuery;
-
/**
* Wrapper class for Lucene's SimpleQueryParser that allows us to redefine
* different types of queries.
*/
-public class SimpleQueryParser extends XSimpleQueryParser {
+public class SimpleQueryParser extends org.apache.lucene.queryparser.simple.SimpleQueryParser {
private final boolean lowercaseExpandedTerms;
diff --git a/src/main/java/org/elasticsearch/index/query/SimpleQueryStringFlag.java b/src/main/java/org/elasticsearch/index/query/SimpleQueryStringFlag.java
index fdebfd4a0c264..54c6291951d76 100644
--- a/src/main/java/org/elasticsearch/index/query/SimpleQueryStringFlag.java
+++ b/src/main/java/org/elasticsearch/index/query/SimpleQueryStringFlag.java
@@ -18,7 +18,6 @@
*/
package org.elasticsearch.index.query;
-import org.apache.lucene.queryparser.XSimpleQueryParser;
import org.elasticsearch.ElasticsearchIllegalArgumentException;
import org.elasticsearch.common.Strings;
@@ -30,18 +29,18 @@
public enum SimpleQueryStringFlag {
ALL(-1),
NONE(0),
- AND(XSimpleQueryParser.AND_OPERATOR),
- NOT(XSimpleQueryParser.NOT_OPERATOR),
- OR(XSimpleQueryParser.OR_OPERATOR),
- PREFIX(XSimpleQueryParser.PREFIX_OPERATOR),
- PHRASE(XSimpleQueryParser.PHRASE_OPERATOR),
- PRECEDENCE(XSimpleQueryParser.PRECEDENCE_OPERATORS),
- ESCAPE(XSimpleQueryParser.ESCAPE_OPERATOR),
- WHITESPACE(XSimpleQueryParser.WHITESPACE_OPERATOR),
- FUZZY(XSimpleQueryParser.FUZZY_OPERATOR),
+ AND(SimpleQueryParser.AND_OPERATOR),
+ NOT(SimpleQueryParser.NOT_OPERATOR),
+ OR(SimpleQueryParser.OR_OPERATOR),
+ PREFIX(SimpleQueryParser.PREFIX_OPERATOR),
+ PHRASE(SimpleQueryParser.PHRASE_OPERATOR),
+ PRECEDENCE(SimpleQueryParser.PRECEDENCE_OPERATORS),
+ ESCAPE(SimpleQueryParser.ESCAPE_OPERATOR),
+ WHITESPACE(SimpleQueryParser.WHITESPACE_OPERATOR),
+ FUZZY(SimpleQueryParser.FUZZY_OPERATOR),
// NEAR and SLOP are synonymous, since "slop" is a more familiar term than "near"
- NEAR(XSimpleQueryParser.NEAR_OPERATOR),
- SLOP(XSimpleQueryParser.NEAR_OPERATOR);
+ NEAR(SimpleQueryParser.NEAR_OPERATOR),
+ SLOP(SimpleQueryParser.NEAR_OPERATOR);
final int value;
diff --git a/src/main/java/org/elasticsearch/index/query/SimpleQueryStringParser.java b/src/main/java/org/elasticsearch/index/query/SimpleQueryStringParser.java
index 0f172ae01b38e..4f2e45a364301 100644
--- a/src/main/java/org/elasticsearch/index/query/SimpleQueryStringParser.java
+++ b/src/main/java/org/elasticsearch/index/query/SimpleQueryStringParser.java
@@ -20,7 +20,6 @@
package org.elasticsearch.index.query;
import org.apache.lucene.analysis.Analyzer;
-import org.apache.lucene.queryparser.XSimpleQueryParser;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.Query;
import org.elasticsearch.common.Strings;
diff --git a/src/main/java/org/elasticsearch/index/search/nested/NestedFieldComparatorSource.java b/src/main/java/org/elasticsearch/index/search/nested/NestedFieldComparatorSource.java
index 425d8a741beff..19da2f1539f49 100644
--- a/src/main/java/org/elasticsearch/index/search/nested/NestedFieldComparatorSource.java
+++ b/src/main/java/org/elasticsearch/index/search/nested/NestedFieldComparatorSource.java
@@ -23,6 +23,7 @@
import org.apache.lucene.search.FieldComparator;
import org.apache.lucene.search.Filter;
import org.apache.lucene.search.SortField;
+import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.FixedBitSet;
import org.elasticsearch.ElasticsearchIllegalArgumentException;
import org.elasticsearch.common.lucene.docset.DocIdSets;
@@ -135,8 +136,13 @@ public final Object value(int slot) {
}
@Override
- public final int compareDocToValue(int rootDoc, Object value) throws IOException {
- throw new UnsupportedOperationException("compareDocToValue() not used for sorting in ES");
+ public void setTopValue(Object top) {
+ throw new UnsupportedOperationException("setTopValue() not used for sorting in ES");
+ }
+
+ @Override
+ public int compareTop(int doc) throws IOException {
+ throw new UnsupportedOperationException("compareTop() not used for sorting in ES");
}
final static class Lowest extends NestedFieldComparator {
diff --git a/src/test/java/org/elasticsearch/index/analysis/ShingleTokenFilterFactoryTests.java b/src/test/java/org/elasticsearch/index/analysis/ShingleTokenFilterFactoryTests.java
index 6cc06be6b4b1c..701caded99e7a 100644
--- a/src/test/java/org/elasticsearch/index/analysis/ShingleTokenFilterFactoryTests.java
+++ b/src/test/java/org/elasticsearch/index/analysis/ShingleTokenFilterFactoryTests.java
@@ -21,7 +21,9 @@
import com.carrotsearch.randomizedtesting.annotations.ThreadLeakScope;
import com.carrotsearch.randomizedtesting.annotations.ThreadLeakScope.Scope;
+import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Tokenizer;
+import org.apache.lucene.analysis.core.StopFilter;
import org.apache.lucene.analysis.core.WhitespaceTokenizer;
import org.elasticsearch.test.ElasticsearchTokenStreamTestCase;
import org.junit.Test;
@@ -68,4 +70,14 @@ public void testInverseMappingNoShingles() throws IOException {
assertTokenStreamContents(tokenFilter.create(tokenizer), expected);
}
+ @Test
+ public void testFillerToken() throws IOException {
+ AnalysisService analysisService = AnalysisTestsHelper.createAnalysisServiceFromClassPath(RESOURCE);
+ TokenFilterFactory tokenFilter = analysisService.tokenFilter("shingle_filler");
+ String source = "simon the sorcerer";
+ String[] expected = new String[]{"simon FILLER sorcerer"};
+ TokenStream tokenizer = new StopFilter(TEST_VERSION_CURRENT, new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader(source)), StopFilter.makeStopSet(TEST_VERSION_CURRENT, "the"));
+ assertTokenStreamContents(tokenFilter.create(tokenizer), expected);
+ }
+
}
diff --git a/src/test/java/org/elasticsearch/index/analysis/shingle_analysis.json b/src/test/java/org/elasticsearch/index/analysis/shingle_analysis.json
index c469a4a4dd03e..33c09fe8dbd9f 100644
--- a/src/test/java/org/elasticsearch/index/analysis/shingle_analysis.json
+++ b/src/test/java/org/elasticsearch/index/analysis/shingle_analysis.json
@@ -9,6 +9,13 @@
"output_unigrams" : false,
"output_unigrams_if_no_shingles" : true,
"token_separator" : "_"
+ },
+ "shingle_filler":{
+ "type":"shingle",
+ "max_shingle_size" : 3,
+ "min_shingle_size" : 2,
+ "output_unigrams" : false,
+ "filler_token" : "FILLER"
}
}
}
diff --git a/src/test/java/org/elasticsearch/indices/warmer/SimpleIndicesWarmerTests.java b/src/test/java/org/elasticsearch/indices/warmer/SimpleIndicesWarmerTests.java
index c8ca7a87d1362..f852801fbfc3a 100644
--- a/src/test/java/org/elasticsearch/indices/warmer/SimpleIndicesWarmerTests.java
+++ b/src/test/java/org/elasticsearch/indices/warmer/SimpleIndicesWarmerTests.java
@@ -21,7 +21,6 @@
import com.carrotsearch.hppc.cursors.ObjectObjectCursor;
import com.google.common.collect.ImmutableList;
-import org.elasticsearch.Version;
import org.elasticsearch.action.admin.indices.segments.IndexSegments;
import org.elasticsearch.action.admin.indices.segments.IndexShardSegments;
import org.elasticsearch.action.admin.indices.segments.IndicesSegmentResponse;
@@ -43,9 +42,10 @@
import org.elasticsearch.search.warmer.IndexWarmersMetaData;
import org.elasticsearch.test.ElasticsearchIntegrationTest;
import org.hamcrest.Matchers;
-import org.junit.Ignore;
import org.junit.Test;
+import java.util.Locale;
+
import static org.hamcrest.Matchers.*;
/**
@@ -267,13 +267,13 @@ private long getWarmerRuns() {
}
private long getSegmentsMemoryUsage(String idx) {
- IndicesSegmentResponse response = client().admin().indices().segments(Requests.indicesSegmentsRequest("idx")).actionGet();
+ IndicesSegmentResponse response = client().admin().indices().segments(Requests.indicesSegmentsRequest(idx)).actionGet();
IndexSegments indicesSegments = response.getIndices().get(idx);
long total = 0;
for (IndexShardSegments indexShardSegments : indicesSegments) {
for (ShardSegments shardSegments : indexShardSegments) {
for (Segment segment : shardSegments) {
- System.out.println("+=" + segment.memoryInBytes + " " + indexShardSegments.getShardId() + " " + shardSegments.getIndex());
+ logger.debug("+=" + segment.memoryInBytes + " " + indexShardSegments.getShardId() + " " + shardSegments.getIndex());
total += segment.memoryInBytes;
}
}
@@ -302,6 +302,7 @@ boolean isLazy() {
@Override
void createIndex(String indexName, String type, String fieldName) throws Exception {
client().admin().indices().prepareCreate(indexName).setSettings(ImmutableSettings.builder().put(SINGLE_SHARD_NO_REPLICA).put(SearchService.NORMS_LOADING_KEY, Loading.LAZY_VALUE)).addMapping(type, JsonXContent.contentBuilder()
+ .startObject()
.startObject(type)
.startObject("properties")
.startObject(fieldName)
@@ -312,6 +313,7 @@ void createIndex(String indexName, String type, String fieldName) throws Excepti
.endObject()
.endObject()
.endObject()
+ .endObject()
).execute().actionGet();
}
@Override
@@ -325,27 +327,21 @@ boolean isLazy() {
return true;
}
}
-
- static {
- assertTrue("remove me when LUCENE-5373 is fixed", Version.CURRENT.luceneVersion == org.apache.lucene.util.Version.LUCENE_46);
- }
-
- @Ignore("enable me when LUCENE-5373 is fixed, see assertion above")
public void testEagerLoading() throws Exception {
for (LoadingMethod method : LoadingMethod.values()) {
- System.out.println("METHOD " + method);
- method.createIndex("idx", "t", "foo");
- client().prepareIndex("idx", "t", "1").setSource("foo", "bar").setRefresh(true).execute().actionGet();
- long memoryUsage0 = getSegmentsMemoryUsage("idx");
+ logger.debug("METHOD " + method);
+ String indexName = method.name().toLowerCase(Locale.ROOT);
+ method.createIndex(indexName, "t", "foo");
+ client().prepareIndex(indexName, "t", "1").setSource("foo", "bar").setRefresh(true).execute().actionGet();
+ long memoryUsage0 = getSegmentsMemoryUsage(indexName);
// queries load norms if they were not loaded before
- client().prepareSearch("idx").setQuery(QueryBuilders.matchQuery("foo", "bar")).execute().actionGet();
- long memoryUsage1 = getSegmentsMemoryUsage("idx");
+ client().prepareSearch(indexName).setQuery(QueryBuilders.matchQuery("foo", "bar")).execute().actionGet();
+ long memoryUsage1 = getSegmentsMemoryUsage(indexName);
if (method.isLazy()) {
assertThat(memoryUsage1, greaterThan(memoryUsage0));
} else {
assertThat(memoryUsage1, equalTo(memoryUsage0));
}
- wipeIndices("idx");
}
}
diff --git a/src/test/java/org/elasticsearch/test/engine/MockInternalEngine.java b/src/test/java/org/elasticsearch/test/engine/MockInternalEngine.java
index f82cebacf729d..d4d6176f8be45 100644
--- a/src/test/java/org/elasticsearch/test/engine/MockInternalEngine.java
+++ b/src/test/java/org/elasticsearch/test/engine/MockInternalEngine.java
@@ -19,10 +19,13 @@
package org.elasticsearch.test.engine;
-import org.apache.lucene.index.*;
+import org.apache.lucene.index.AssertingDirectoryReader;
+import org.apache.lucene.index.DirectoryReader;
+import org.apache.lucene.index.FilterDirectoryReader;
+import org.apache.lucene.index.IndexReader;
import org.apache.lucene.search.AssertingIndexSearcher;
import org.apache.lucene.search.IndexSearcher;
-import org.apache.lucene.search.XSearcherManager;
+import org.apache.lucene.search.SearcherManager;
import org.elasticsearch.ElasticsearchException;
import org.elasticsearch.common.Nullable;
import org.elasticsearch.common.inject.Inject;
@@ -94,7 +97,7 @@ public void close() throws ElasticsearchException {
}
@Override
- protected Searcher newSearcher(String source, IndexSearcher searcher, XSearcherManager manager) throws EngineException {
+ protected Searcher newSearcher(String source, IndexSearcher searcher, SearcherManager manager) throws EngineException {
IndexReader reader = searcher.getIndexReader();
IndexReader wrappedReader = reader;