diff --git a/llamafile/highlight_js.cpp b/llamafile/highlight_js.cpp index 7d40a477ba..9c8a2764c5 100644 --- a/llamafile/highlight_js.cpp +++ b/llamafile/highlight_js.cpp @@ -34,6 +34,8 @@ enum { TICK_BACKSLASH, REGEX, REGEX_BACKSLASH, + REGEX_SQUARE, + REGEX_SQUARE_BACKSLASH, }; enum { @@ -141,6 +143,8 @@ void HighlightJs::feed(std::string *r, std::string_view input) { *r += c; if (c == '\\') { t_ = REGEX_BACKSLASH; + } else if (c == '[') { + t_ = REGEX_SQUARE; } else { t_ = REGEX; } @@ -227,6 +231,8 @@ void HighlightJs::feed(std::string *r, std::string_view input) { t_ = NORMAL; } else if (c == '\\') { t_ = REGEX_BACKSLASH; + } else if (c == '[') { + t_ = REGEX_SQUARE; } break; @@ -235,6 +241,21 @@ void HighlightJs::feed(std::string *r, std::string_view input) { t_ = REGEX; break; + case REGEX_SQUARE: + // because /[/]/g is valid code + *r += c; + if (c == '\\') { + t_ = REGEX_SQUARE_BACKSLASH; + } else if (c == ']') { + t_ = REGEX; + } + break; + + case REGEX_SQUARE_BACKSLASH: + *r += c; + t_ = REGEX_SQUARE; + break; + default: __builtin_unreachable(); } @@ -279,6 +300,8 @@ void HighlightJs::flush(std::string *r) { case SLASH_STAR_STAR: case REGEX: case REGEX_BACKSLASH: + case REGEX_SQUARE: + case REGEX_SQUARE_BACKSLASH: *r += HI_RESET; break; default: diff --git a/llamafile/is_keyword_js.c b/llamafile/is_keyword_js.c index 2cf4a73633..c4d8c264c0 100644 --- a/llamafile/is_keyword_js.c +++ b/llamafile/is_keyword_js.c @@ -33,12 +33,12 @@ #include -#define TOTAL_KEYWORDS 36 +#define TOTAL_KEYWORDS 53 #define MIN_WORD_LENGTH 2 -#define MAX_WORD_LENGTH 10 +#define MAX_WORD_LENGTH 12 #define MIN_HASH_VALUE 2 -#define MAX_HASH_VALUE 61 -/* maximum key range = 60, duplicates = 0 */ +#define MAX_HASH_VALUE 71 +/* maximum key range = 70, duplicates = 0 */ #ifdef __GNUC__ __inline @@ -52,32 +52,32 @@ hash (register const char *str, register size_t len) { static const unsigned char asso_values[] = { - 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, - 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, - 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, - 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, - 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, - 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, - 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, - 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, - 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, - 62, 62, 62, 62, 62, 62, 62, 0, 15, 0, - 0, 0, 10, 62, 10, 0, 62, 62, 10, 5, - 0, 20, 62, 62, 30, 0, 20, 40, 15, 15, - 25, 35, 62, 62, 62, 62, 62, 62, 62, 62, - 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, - 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, - 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, - 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, - 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, - 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, - 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, - 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, - 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, - 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, - 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, - 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, - 62, 62, 62, 62, 62, 62 + 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, + 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, + 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, + 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, + 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, + 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, + 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, + 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, + 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, + 72, 72, 72, 72, 72, 72, 72, 10, 35, 20, + 25, 25, 20, 72, 0, 0, 72, 72, 40, 20, + 0, 15, 30, 72, 15, 5, 0, 35, 0, 10, + 0, 45, 72, 72, 72, 72, 72, 72, 72, 72, + 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, + 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, + 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, + 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, + 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, + 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, + 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, + 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, + 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, + 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, + 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, + 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, + 72, 72, 72, 72, 72, 72 }; return len + asso_values[(unsigned char)str[1]] + asso_values[(unsigned char)str[0]]; } @@ -88,132 +88,184 @@ is_keyword_js (register const char *str, register size_t len) struct stringpool_t { char stringpool_str2[sizeof("in")]; - char stringpool_str3[sizeof("new")]; - char stringpool_str4[sizeof("case")]; - char stringpool_str5[sizeof("catch")]; - char stringpool_str6[sizeof("delete")]; - char stringpool_str7[sizeof("default")]; - char stringpool_str8[sizeof("debugger")]; + char stringpool_str4[sizeof("this")]; + char stringpool_str5[sizeof("throw")]; + char stringpool_str6[sizeof("throws")]; + char stringpool_str9[sizeof("interface")]; char stringpool_str10[sizeof("instanceof")]; - char stringpool_str11[sizeof("import")]; - char stringpool_str12[sizeof("if")]; - char stringpool_str13[sizeof("let")]; - char stringpool_str14[sizeof("else")]; - char stringpool_str15[sizeof("class")]; - char stringpool_str17[sizeof("finally")]; - char stringpool_str18[sizeof("var")]; - char stringpool_str19[sizeof("with")]; - char stringpool_str20[sizeof("await")]; + char stringpool_str11[sizeof("static")]; + char stringpool_str13[sizeof("var")]; + char stringpool_str14[sizeof("with")]; + char stringpool_str15[sizeof("while")]; + char stringpool_str16[sizeof("native")]; + char stringpool_str17[sizeof("as")]; + char stringpool_str18[sizeof("try")]; + char stringpool_str19[sizeof("void")]; + char stringpool_str20[sizeof("async")]; char stringpool_str21[sizeof("switch")]; - char stringpool_str22[sizeof("do")]; - char stringpool_str25[sizeof("const")]; - char stringpool_str26[sizeof("static")]; - char stringpool_str28[sizeof("continue")]; - char stringpool_str30[sizeof("while")]; + char stringpool_str22[sizeof("if")]; + char stringpool_str23[sizeof("volatile")]; + char stringpool_str24[sizeof("transient")]; + char stringpool_str25[sizeof("await")]; + char stringpool_str26[sizeof("import")]; + char stringpool_str27[sizeof("finally")]; + char stringpool_str28[sizeof("new")]; + char stringpool_str29[sizeof("enum")]; + char stringpool_str30[sizeof("implements")]; char stringpool_str31[sizeof("export")]; char stringpool_str32[sizeof("extends")]; - char stringpool_str33[sizeof("for")]; - char stringpool_str34[sizeof("this")]; - char stringpool_str35[sizeof("throw")]; - char stringpool_str36[sizeof("return")]; - char stringpool_str39[sizeof("void")]; - char stringpool_str40[sizeof("yield")]; + char stringpool_str34[sizeof("case")]; + char stringpool_str35[sizeof("catch")]; + char stringpool_str37[sizeof("of")]; + char stringpool_str38[sizeof("for")]; + char stringpool_str39[sizeof("from")]; + char stringpool_str40[sizeof("const")]; + char stringpool_str42[sizeof("do")]; + char stringpool_str43[sizeof("continue")]; char stringpool_str45[sizeof("super")]; - char stringpool_str50[sizeof("break")]; - char stringpool_str53[sizeof("try")]; - char stringpool_str58[sizeof("function")]; - char stringpool_str61[sizeof("typeof")]; + char stringpool_str46[sizeof("return")]; + char stringpool_str47[sizeof("package")]; + char stringpool_str50[sizeof("yield")]; + char stringpool_str51[sizeof("typeof")]; + char stringpool_str52[sizeof("private")]; + char stringpool_str53[sizeof("abstract")]; + char stringpool_str54[sizeof("protected")]; + char stringpool_str55[sizeof("break")]; + char stringpool_str56[sizeof("delete")]; + char stringpool_str57[sizeof("default")]; + char stringpool_str58[sizeof("debugger")]; + char stringpool_str62[sizeof("synchronized")]; + char stringpool_str63[sizeof("function")]; + char stringpool_str65[sizeof("class")]; + char stringpool_str68[sizeof("let")]; + char stringpool_str69[sizeof("else")]; + char stringpool_str71[sizeof("public")]; }; static const struct stringpool_t stringpool_contents = { "in", - "new", - "case", - "catch", - "delete", - "default", - "debugger", + "this", + "throw", + "throws", + "interface", "instanceof", - "import", - "if", - "let", - "else", - "class", - "finally", + "static", "var", "with", - "await", - "switch", - "do", - "const", - "static", - "continue", "while", + "native", + "as", + "try", + "void", + "async", + "switch", + "if", + "volatile", + "transient", + "await", + "import", + "finally", + "new", + "enum", + "implements", "export", "extends", + "case", + "catch", + "of", "for", - "this", - "throw", + "from", + "const", + "do", + "continue", + "super", "return", - "void", + "package", "yield", - "super", + "typeof", + "private", + "abstract", + "protected", "break", - "try", + "delete", + "default", + "debugger", + "synchronized", "function", - "typeof" + "class", + "let", + "else", + "public" }; #define stringpool ((const char *) &stringpool_contents) static const int wordlist[] = { -1, -1, (int)(size_t)&((struct stringpool_t *)0)->stringpool_str2, - (int)(size_t)&((struct stringpool_t *)0)->stringpool_str3, + -1, (int)(size_t)&((struct stringpool_t *)0)->stringpool_str4, (int)(size_t)&((struct stringpool_t *)0)->stringpool_str5, (int)(size_t)&((struct stringpool_t *)0)->stringpool_str6, - (int)(size_t)&((struct stringpool_t *)0)->stringpool_str7, - (int)(size_t)&((struct stringpool_t *)0)->stringpool_str8, - -1, + -1, -1, + (int)(size_t)&((struct stringpool_t *)0)->stringpool_str9, (int)(size_t)&((struct stringpool_t *)0)->stringpool_str10, (int)(size_t)&((struct stringpool_t *)0)->stringpool_str11, - (int)(size_t)&((struct stringpool_t *)0)->stringpool_str12, + -1, (int)(size_t)&((struct stringpool_t *)0)->stringpool_str13, (int)(size_t)&((struct stringpool_t *)0)->stringpool_str14, (int)(size_t)&((struct stringpool_t *)0)->stringpool_str15, - -1, + (int)(size_t)&((struct stringpool_t *)0)->stringpool_str16, (int)(size_t)&((struct stringpool_t *)0)->stringpool_str17, (int)(size_t)&((struct stringpool_t *)0)->stringpool_str18, (int)(size_t)&((struct stringpool_t *)0)->stringpool_str19, (int)(size_t)&((struct stringpool_t *)0)->stringpool_str20, (int)(size_t)&((struct stringpool_t *)0)->stringpool_str21, (int)(size_t)&((struct stringpool_t *)0)->stringpool_str22, - -1, -1, + (int)(size_t)&((struct stringpool_t *)0)->stringpool_str23, + (int)(size_t)&((struct stringpool_t *)0)->stringpool_str24, (int)(size_t)&((struct stringpool_t *)0)->stringpool_str25, (int)(size_t)&((struct stringpool_t *)0)->stringpool_str26, - -1, + (int)(size_t)&((struct stringpool_t *)0)->stringpool_str27, (int)(size_t)&((struct stringpool_t *)0)->stringpool_str28, - -1, + (int)(size_t)&((struct stringpool_t *)0)->stringpool_str29, (int)(size_t)&((struct stringpool_t *)0)->stringpool_str30, (int)(size_t)&((struct stringpool_t *)0)->stringpool_str31, (int)(size_t)&((struct stringpool_t *)0)->stringpool_str32, - (int)(size_t)&((struct stringpool_t *)0)->stringpool_str33, + -1, (int)(size_t)&((struct stringpool_t *)0)->stringpool_str34, (int)(size_t)&((struct stringpool_t *)0)->stringpool_str35, - (int)(size_t)&((struct stringpool_t *)0)->stringpool_str36, - -1, -1, + -1, + (int)(size_t)&((struct stringpool_t *)0)->stringpool_str37, + (int)(size_t)&((struct stringpool_t *)0)->stringpool_str38, (int)(size_t)&((struct stringpool_t *)0)->stringpool_str39, (int)(size_t)&((struct stringpool_t *)0)->stringpool_str40, - -1, -1, -1, -1, + -1, + (int)(size_t)&((struct stringpool_t *)0)->stringpool_str42, + (int)(size_t)&((struct stringpool_t *)0)->stringpool_str43, + -1, (int)(size_t)&((struct stringpool_t *)0)->stringpool_str45, - -1, -1, -1, -1, - (int)(size_t)&((struct stringpool_t *)0)->stringpool_str50, + (int)(size_t)&((struct stringpool_t *)0)->stringpool_str46, + (int)(size_t)&((struct stringpool_t *)0)->stringpool_str47, -1, -1, + (int)(size_t)&((struct stringpool_t *)0)->stringpool_str50, + (int)(size_t)&((struct stringpool_t *)0)->stringpool_str51, + (int)(size_t)&((struct stringpool_t *)0)->stringpool_str52, (int)(size_t)&((struct stringpool_t *)0)->stringpool_str53, - -1, -1, -1, -1, + (int)(size_t)&((struct stringpool_t *)0)->stringpool_str54, + (int)(size_t)&((struct stringpool_t *)0)->stringpool_str55, + (int)(size_t)&((struct stringpool_t *)0)->stringpool_str56, + (int)(size_t)&((struct stringpool_t *)0)->stringpool_str57, (int)(size_t)&((struct stringpool_t *)0)->stringpool_str58, + -1, -1, -1, + (int)(size_t)&((struct stringpool_t *)0)->stringpool_str62, + (int)(size_t)&((struct stringpool_t *)0)->stringpool_str63, + -1, + (int)(size_t)&((struct stringpool_t *)0)->stringpool_str65, -1, -1, - (int)(size_t)&((struct stringpool_t *)0)->stringpool_str61 + (int)(size_t)&((struct stringpool_t *)0)->stringpool_str68, + (int)(size_t)&((struct stringpool_t *)0)->stringpool_str69, + -1, + (int)(size_t)&((struct stringpool_t *)0)->stringpool_str71 }; if (len <= MAX_WORD_LENGTH && len >= MIN_WORD_LENGTH) diff --git a/llamafile/is_keyword_js.gperf b/llamafile/is_keyword_js.gperf index c59e6580c8..082ee25153 100644 --- a/llamafile/is_keyword_js.gperf +++ b/llamafile/is_keyword_js.gperf @@ -7,6 +7,9 @@ %readonly-tables %define lookup-function-name is_keyword_js %% +abstract +as +async await break case @@ -19,27 +22,41 @@ default delete do else +enum export extends finally for +from function if +implements import in instanceof +interface let +native new +of +package +private +protected +public return static super switch +synchronized this throw +throws +transient try typeof var void +volatile while with yield diff --git a/llamafile/is_keyword_js_constant.c b/llamafile/is_keyword_js_constant.c index 2e65fb961d..886f0f16e4 100644 --- a/llamafile/is_keyword_js_constant.c +++ b/llamafile/is_keyword_js_constant.c @@ -33,12 +33,12 @@ #include -#define TOTAL_KEYWORDS 7 +#define TOTAL_KEYWORDS 8 #define MIN_WORD_LENGTH 3 #define MAX_WORD_LENGTH 10 #define MIN_HASH_VALUE 3 -#define MAX_HASH_VALUE 10 -/* maximum key range = 8, duplicates = 0 */ +#define MAX_HASH_VALUE 19 +/* maximum key range = 17, duplicates = 0 */ #ifdef __GNUC__ __inline @@ -52,32 +52,32 @@ hash (register const char *str, register size_t len) { static const unsigned char asso_values[] = { - 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, - 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, - 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, - 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, - 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, - 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, - 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, - 11, 11, 11, 0, 11, 11, 11, 11, 0, 11, - 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, - 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, - 11, 11, 0, 0, 11, 11, 11, 11, 11, 11, - 2, 11, 11, 11, 11, 11, 0, 0, 11, 11, - 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, - 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, - 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, - 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, - 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, - 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, - 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, - 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, - 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, - 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, - 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, - 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, - 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, - 11, 11, 11, 11, 11, 11 + 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, + 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, + 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, + 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, + 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, + 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, + 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, + 20, 20, 20, 0, 20, 20, 20, 20, 0, 20, + 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, + 20, 20, 20, 20, 20, 20, 20, 10, 20, 20, + 20, 20, 0, 0, 20, 20, 20, 20, 20, 20, + 10, 20, 20, 20, 20, 20, 0, 0, 20, 20, + 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, + 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, + 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, + 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, + 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, + 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, + 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, + 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, + 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, + 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, + 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, + 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, + 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, + 20, 20, 20, 20, 20, 20 }; return len + asso_values[(unsigned char)str[0]]; } @@ -90,20 +90,22 @@ is_keyword_js_constant (register const char *str, register size_t len) char stringpool_str3[sizeof("NaN")]; char stringpool_str4[sizeof("true")]; char stringpool_str5[sizeof("false")]; - char stringpool_str6[sizeof("null")]; char stringpool_str8[sizeof("Infinity")]; char stringpool_str9[sizeof("undefined")]; char stringpool_str10[sizeof("globalThis")]; + char stringpool_str14[sizeof("null")]; + char stringpool_str19[sizeof("arguments")]; }; static const struct stringpool_t stringpool_contents = { "NaN", "true", "false", - "null", "Infinity", "undefined", - "globalThis" + "globalThis", + "null", + "arguments" }; #define stringpool ((const char *) &stringpool_contents) static const int wordlist[] = @@ -112,11 +114,14 @@ is_keyword_js_constant (register const char *str, register size_t len) (int)(size_t)&((struct stringpool_t *)0)->stringpool_str3, (int)(size_t)&((struct stringpool_t *)0)->stringpool_str4, (int)(size_t)&((struct stringpool_t *)0)->stringpool_str5, - (int)(size_t)&((struct stringpool_t *)0)->stringpool_str6, - -1, + -1, -1, (int)(size_t)&((struct stringpool_t *)0)->stringpool_str8, (int)(size_t)&((struct stringpool_t *)0)->stringpool_str9, - (int)(size_t)&((struct stringpool_t *)0)->stringpool_str10 + (int)(size_t)&((struct stringpool_t *)0)->stringpool_str10, + -1, -1, -1, + (int)(size_t)&((struct stringpool_t *)0)->stringpool_str14, + -1, -1, -1, -1, + (int)(size_t)&((struct stringpool_t *)0)->stringpool_str19 }; if (len <= MAX_WORD_LENGTH && len >= MIN_WORD_LENGTH) diff --git a/llamafile/is_keyword_js_constant.gperf b/llamafile/is_keyword_js_constant.gperf index 26e8a0d302..593ae796b9 100644 --- a/llamafile/is_keyword_js_constant.gperf +++ b/llamafile/is_keyword_js_constant.gperf @@ -9,6 +9,7 @@ %% Infinity NaN +arguments false globalThis null