diff --git a/html/parse.go b/html/parse.go deleted file mode 100644 index b7e1ba3..0000000 --- a/html/parse.go +++ /dev/null @@ -1,403 +0,0 @@ -package html - -import ( - "bytes" - "fmt" - "io" - "strings" - - "github.com/tdewolff/parse/v2" - "github.com/tdewolff/parse/v2/css" -) - -type AST struct { - Children []*Tag - Text []byte -} - -func (ast *AST) String() string { - sb := strings.Builder{} - for i, child := range ast.Children { - if i != 0 { - sb.WriteString("\n") - } - sb.WriteString(child.ASTString()) - } - return sb.String() -} - -type Attr struct { - Key, Val []byte -} - -func (attr *Attr) String() string { - return fmt.Sprintf(`%s="%s"`, string(attr.Key), string(attr.Val)) -} - -type Tag struct { - Root *AST - Parent *Tag - Prev, Next *Tag - Children []*Tag - Index int - - Name []byte - Attrs []Attr - textStart, textEnd int -} - -func (tag *Tag) getAttr(key []byte) ([]byte, bool) { - for _, attr := range tag.Attrs { - if bytes.Equal(key, attr.Key) { - return attr.Val, true - } - } - return nil, false -} - -func (tag *Tag) GetAttr(key string) (string, bool) { - val, ok := tag.getAttr([]byte(key)) - return string(val), ok -} - -func (tag *Tag) Text() string { - return string(tag.Root.Text[tag.textStart:tag.textEnd]) -} - -func (tag *Tag) String() string { - sb := strings.Builder{} - sb.WriteString("<") - sb.Write(tag.Name) - for _, attr := range tag.Attrs { - sb.WriteString(" ") - sb.WriteString(attr.String()) - } - sb.WriteString(">") - return sb.String() -} - -func (tag *Tag) ASTString() string { - sb := strings.Builder{} - sb.WriteString(tag.String()) - for _, child := range tag.Children { - sb.WriteString("\n ") - s := child.ASTString() - s = strings.ReplaceAll(s, "\n", "\n ") - sb.WriteString(s) - } - return sb.String() -} - -func Parse(r *parse.Input) (*AST, error) { - ast := &AST{} - root := &Tag{} - cur := root - - l := NewLexer(r) - for { - tt, data := l.Next() - switch tt { - case ErrorToken: - if err := l.Err(); err != io.EOF { - return nil, err - } - ast.Children = root.Children - return ast, nil - case TextToken: - ast.Text = append(ast.Text, data...) - case StartTagToken: - child := &Tag{ - Root: ast, - Parent: cur, - Index: len(cur.Children), - Name: l.Text(), - textStart: len(ast.Text), - } - if 0 < len(cur.Children) { - child.Prev = cur.Children[len(cur.Children)-1] - child.Prev.Next = child - } - cur.Children = append(cur.Children, child) - cur = child - case AttributeToken: - val := l.AttrVal() - if 0 < len(val) && (val[0] == '"' || val[0] == '\'') { - val = val[1 : len(val)-1] - } - cur.Attrs = append(cur.Attrs, Attr{l.AttrKey(), val}) - case StartTagCloseToken: - if voidTags[string(cur.Name)] { - cur.textEnd = len(ast.Text) - cur = cur.Parent - } - case EndTagToken, StartTagVoidToken: - start := cur - for start != root && !bytes.Equal(l.Text(), start.Name) { - start = start.Parent - } - if start == root { - // ignore - } else { - parent := start.Parent - for cur != parent { - cur.textEnd = len(ast.Text) - cur = cur.Parent - } - } - } - } -} - -func (ast *AST) Query(s string) (*Tag, error) { - sel, err := ParseSelector(s) - if err != nil { - return nil, err - } - - for _, child := range ast.Children { - if match := child.query(sel); match != nil { - return match, nil - } - } - return nil, nil -} - -func (tag *Tag) query(sel selector) *Tag { - if sel.AppliesTo(tag) { - return tag - } - for _, child := range tag.Children { - if match := child.query(sel); match != nil { - return match - } - } - return nil -} - -func (ast *AST) QueryAll(s string) ([]*Tag, error) { - sel, err := ParseSelector(s) - if err != nil { - return nil, err - } - - matches := []*Tag{} - for _, child := range ast.Children { - child.queryAll(&matches, sel) - } - return matches, nil -} - -func (tag *Tag) queryAll(matches *[]*Tag, sel selector) { - if sel.AppliesTo(tag) { - *matches = append(*matches, tag) - } - for _, child := range tag.Children { - child.queryAll(matches, sel) - } -} - -type attrSelector struct { - op byte // empty, =, ~, | - attr []byte - val []byte -} - -func (sel attrSelector) AppliesTo(tag *Tag) bool { - val, ok := tag.getAttr(sel.attr) - if !ok { - return false - } - - switch sel.op { - case 0: - return true - case '=': - return bytes.Equal(val, sel.val) - case '~': - if 0 < len(sel.val) { - vals := bytes.Split(val, []byte(" ")) - for _, val := range vals { - if bytes.Equal(val, sel.val) { - return true - } - } - } - case '|': - return bytes.Equal(val, sel.val) || bytes.HasPrefix(val, append(sel.val, '-')) - } - return false -} - -func (attr attrSelector) String() string { - sb := strings.Builder{} - sb.Write(attr.attr) - if attr.op != 0 { - sb.WriteByte(attr.op) - if attr.op != '=' { - sb.WriteByte('=') - } - sb.WriteByte('"') - sb.Write(attr.val) - sb.WriteByte('"') - } - return sb.String() -} - -type selectorNode struct { - typ []byte // is * for universal - attrs []attrSelector - op byte // space or >, last is NULL -} - -func (sel selectorNode) AppliesTo(tag *Tag) bool { - if 0 < len(sel.typ) && !bytes.Equal(sel.typ, []byte("*")) && !bytes.Equal(sel.typ, tag.Name) { - return false - } - for _, attr := range sel.attrs { - if !attr.AppliesTo(tag) { - return false - } - } - return true -} - -func (sel selectorNode) String() string { - sb := strings.Builder{} - sb.Write(sel.typ) - for _, attr := range sel.attrs { - if bytes.Equal(attr.attr, []byte("id")) && attr.op == '=' { - sb.WriteByte('#') - sb.Write(attr.val) - } else if bytes.Equal(attr.attr, []byte("class")) && attr.op == '~' { - sb.WriteByte('.') - sb.Write(attr.val) - } else { - sb.WriteByte('[') - sb.WriteString(attr.String()) - sb.WriteByte(']') - } - } - if sel.op != 0 { - sb.WriteByte(' ') - sb.WriteByte(sel.op) - sb.WriteByte(' ') - } - return sb.String() -} - -type token struct { - tt css.TokenType - data []byte -} - -type selector []selectorNode - -func ParseSelector(s string) (selector, error) { - ts := []token{} - l := css.NewLexer(parse.NewInputString(s)) - for { - tt, data := l.Next() - if tt == css.ErrorToken { - if err := l.Err(); err != io.EOF { - return selector{}, err - } - break - } - ts = append(ts, token{ - tt: tt, - data: data, - }) - } - - sel := selector{} - node := selectorNode{} - for i := 0; i < len(ts); i++ { - t := ts[i] - if 0 < i && (t.tt == css.WhitespaceToken || t.tt == css.DelimToken && t.data[0] == '>') { - if t.tt == css.DelimToken { - node.op = '>' - } else { - node.op = ' ' - } - sel = append(sel, node) - node = selectorNode{} - } else if t.tt == css.IdentToken || t.tt == css.DelimToken && t.data[0] == '*' { - node.typ = t.data - } else if t.tt == css.DelimToken && (t.data[0] == '.' || t.data[0] == '#') && i+1 < len(ts) && ts[i+1].tt == css.IdentToken { - if t.data[0] == '#' { - node.attrs = append(node.attrs, attrSelector{op: '=', attr: []byte("id"), val: ts[i+1].data}) - } else { - node.attrs = append(node.attrs, attrSelector{op: '~', attr: []byte("class"), val: ts[i+1].data}) - } - i++ - } else if t.tt == css.DelimToken && t.data[0] == '[' && i+2 < len(ts) && ts[i+1].tt == css.IdentToken && ts[i+2].tt == css.DelimToken { - if ts[i+2].data[0] == ']' { - node.attrs = append(node.attrs, attrSelector{op: 0, attr: ts[i+1].data}) - i += 2 - } else if i+4 < len(ts) && ts[i+3].tt == css.IdentToken && ts[i+4].tt == css.DelimToken && ts[i+4].data[0] == ']' { - node.attrs = append(node.attrs, attrSelector{op: ts[i+2].data[0], attr: ts[i+1].data, val: ts[i+3].data}) - i += 4 - } - } - } - sel = append(sel, node) - return sel, nil -} - -func (sels selector) AppliesTo(tag *Tag) bool { - if len(sels) == 0 { - return true - } else if !sels[len(sels)-1].AppliesTo(tag) { - return false - } - - tag = tag.Parent - isel := len(sels) - 2 - for 0 <= isel && tag != nil { - switch sels[isel].op { - case ' ': - for tag != nil { - if sels[isel].AppliesTo(tag) { - break - } - tag = tag.Parent - } - case '>': - if !sels[isel].AppliesTo(tag) { - return false - } - tag = tag.Parent - default: - return false - } - isel-- - } - return len(sels) != 0 && isel == -1 -} - -func (sels selector) String() string { - if len(sels) == 0 { - return "" - } - sb := strings.Builder{} - for _, sel := range sels { - sb.WriteString(sel.String()) - } - return sb.String()[1:] -} - -var voidTags = map[string]bool{ - "area": true, - "base": true, - "br": true, - "col": true, - "embed": true, - "hr": true, - "img": true, - "input": true, - "link": true, - "meta": true, - "source": true, - "track": true, - "wbr": true, -} diff --git a/js/ast.go b/js/ast.go index 47144a3..b2fa3f7 100644 --- a/js/ast.go +++ b/js/ast.go @@ -504,7 +504,18 @@ func (n ExprStmt) String() string { // JS writes JavaScript to writer. func (n ExprStmt) JS(w io.Writer) { - n.Value.JS(w) + buf := &bytes.Buffer{} + n.Value.JS(buf) + expr := buf.Bytes() + + group := bytes.HasPrefix(expr, []byte("let ")) + if group { + w.Write([]byte("(")) + } + w.Write(expr) + if group { + w.Write([]byte(")")) + } w.Write([]byte(";")) } diff --git a/js/ast_test.go b/js/ast_test.go index 5a1f59a..6468f59 100644 --- a/js/ast_test.go +++ b/js/ast_test.go @@ -125,6 +125,7 @@ func TestJS(t *testing.T) { {"do//!\n; while(1)", "//! do; while (1);"}, // space after //! is newline {"//!\nn=>{ return n }", "//! (n) => { return n; };"}, // space after //! is newline {"//!\n{//!\n}", "//! { //! }"}, // space after //! is newline + {`for(;;)let = 5`, `for ( ; ; ) { (let = 5); }`}, } re := regexp.MustCompile("\n *") diff --git a/js/parse.go b/js/parse.go index aab4002..3a1bd3d 100644 --- a/js/parse.go +++ b/js/parse.go @@ -259,6 +259,9 @@ func (p *Parser) parseStmt(allowDeclaration bool) (stmt IStmt) { p.fail("let declaration") return } + } else if p.tt == OpenBracketToken { + p.failMessage("unexpected let [ in single-statement context") + return } else { // expression stmt = &ExprStmt{p.parseIdentifierExpression(OpExpr, let)} diff --git a/js/parse_test.go b/js/parse_test.go index 1835f13..67c491d 100644 --- a/js/parse_test.go +++ b/js/parse_test.go @@ -724,7 +724,8 @@ func TestParseError(t *testing.T) { {"\u2010", "unexpected \u2010"}, {"a=\u2010", "unexpected \u2010 in expression"}, {"/", "unexpected EOF or newline in regular expression"}, - {"({...[]})=>a", "unexpected => in expression"}, // go-fuzz + {"({...[]})=>a", "unexpected => in expression"}, // go-fuzz + {`for(;;)let[a]`, `unexpected let [ in single-statement context`}, // go-fuzz } for _, tt := range tests { t.Run(tt.js, func(t *testing.T) {