From ab3f3319ba5a4f5b2e69c38fd4e6cdca60b51f2f Mon Sep 17 00:00:00 2001 From: Tim Radvan Date: Sun, 16 Sep 2018 17:21:09 +0100 Subject: [PATCH 1/7] Allow type to be a function --- moo.js | 29 ++++++++++++++------------- test/test.js | 56 +++++++++++++++++++++++++++++++++++++++++++++++++++- 2 files changed, 70 insertions(+), 15 deletions(-) diff --git a/moo.js b/moo.js index 5599aec..580280c 100644 --- a/moo.js +++ b/moo.js @@ -111,7 +111,7 @@ // nb. error and fallback imply lineBreaks var options = { - tokenType: name, + defaultType: name, lineBreaks: !!obj.error || !!obj.fallback, pop: false, next: null, @@ -119,7 +119,7 @@ error: false, fallback: false, value: null, - getType: null, + type: null, shouldThrow: false, } @@ -138,7 +138,7 @@ : isRegExp(b) ? -1 : isRegExp(a) ? +1 : b.length - a.length }) if (options.keywords) { - options.getType = keywordTransform(options.keywords) + options.type = keywordTransform(options.keywords) } return options } @@ -166,9 +166,9 @@ // errorRule can only be set once if (errorRule) { if (!options.fallback === !errorRule.fallback) { - throw new Error("Multiple " + (options.fallback ? "fallback" : "error") + " rules not allowed (for token '" + options.tokenType + "')") + throw new Error("Multiple " + (options.fallback ? "fallback" : "error") + " rules not allowed (for token '" + options.defaultType + "')") } else { - throw new Error("fallback and error are mutually exclusive (for token '" + options.tokenType + "')") + throw new Error("fallback and error are mutually exclusive (for token '" + options.defaultType + "')") } } errorRule = options @@ -185,10 +185,10 @@ // Warn about inappropriate state-switching options if (options.pop || options.push || options.next) { if (!hasStates) { - throw new Error("State-switching options are not allowed in stateless lexers (for token '" + options.tokenType + "')") + throw new Error("State-switching options are not allowed in stateless lexers (for token '" + options.defaultType + "')") } if (options.fallback) { - throw new Error("State-switching options are not allowed on fallback tokens (for token '" + options.tokenType + "')") + throw new Error("State-switching options are not allowed on fallback tokens (for token '" + options.defaultType + "')") } } @@ -244,10 +244,10 @@ function checkStateGroup(g, name, map) { var state = g && (g.push || g.next) if (state && !map[state]) { - throw new Error("Missing state '" + state + "' (in token '" + g.tokenType + "' of state '" + name + "')") + throw new Error("Missing state '" + state + "' (in token '" + g.defaultType + "' of state '" + name + "')") } if (g && g.pop && +g.pop !== 1) { - throw new Error("pop must be 1 (in token '" + g.tokenType + "' of state '" + name + "')") + throw new Error("pop must be 1 (in token '" + g.defaultType + "' of state '" + name + "')") } } function compileStates(states, start) { @@ -342,7 +342,7 @@ source += '}\n' } source += '}\n' - return Function('value', source) // getType + return Function('value', source) // type } /***************************************************************************/ @@ -500,8 +500,8 @@ } var token = { - type: (group.getType && group.getType(text)) || group.tokenType, - value: group.value ? group.value(text) : text, + type: (typeof group.type === 'function' && group.type(text)) || group.defaultType, + value: typeof group.value === 'function' ? group.value(text) : text, text: text, toString: tokenToString, offset: offset, @@ -560,11 +560,11 @@ Lexer.prototype.has = function(tokenType) { for (var s in this.states) { var state = this.states[s] - if (state.error && state.error.tokenType === tokenType) return true + if (state.error && state.error.defaultType === tokenType) return true var groups = state.groups for (var i = 0; i < groups.length; i++) { var group = groups[i] - if (group.tokenType === tokenType) return true + if (group.defaultType === tokenType) return true if (group.keywords && hasOwnProperty.call(group.keywords, tokenType)) { return true } @@ -579,6 +579,7 @@ states: compileStates, error: Object.freeze({error: true}), fallback: Object.freeze({fallback: true}), + keywords: keywordTransform, } })); diff --git a/test/test.js b/test/test.js index fc6b950..1ac03c5 100644 --- a/test/test.js +++ b/test/test.js @@ -344,6 +344,61 @@ describe('keywords', () => { }) +describe('type transforms', () => { + + test('can use moo.keywords as type', () => { + let lexer = compile({ + identifier: { + match: /[a-zA-Z]+/, + type: moo.keywords({ + 'kw-class': 'class', + 'kw-def': 'def', + 'kw-if': 'if', + }), + }, + space: {match: /\s+/, lineBreaks: true}, + }) + lexer.reset('foo def') + expect(Array.from(lexer).map(t => t.type)).toEqual([ + 'identifier', + 'space', + 'kw-def', + ]) + }) + + test('type can be a function', () => { + let lexer = compile({ + identifier: { + match: /[a-zA-Z]+/, + type: () => 'moo', + }, + }) + lexer.reset('baa') + expect(lexer.next()).toMatchObject({ type: 'moo' }) + }) + + test('supports case-insensitive keywords', () => { + const caseInsensitiveKeywords = map => { + const transform = moo.keywords(map) + return text => transform(text.toLowerCase()) + } + let lexer = compile({ + space: ' ', + identifier: { + match: /[a-zA-Z]+/, + type: caseInsensitiveKeywords({ + keyword: ['moo'], + }), + }, + }) + lexer.reset('mOo') + expect(lexer.next()).toMatchObject({ type: 'keyword', value: 'mOo' }) + lexer.reset('cheese') + expect(lexer.next()).toMatchObject({ type: 'identifier', value: 'cheese'}) + }) + +}) + describe('value transforms', () => { test('forbid capture groups', () => { @@ -857,7 +912,6 @@ describe('errors', () => { digits: /[0-9]+/, error: moo.error, }) - expect(lexer.error).toMatchObject({tokenType: 'error'}) lexer.reset('123foo') expect(lexer.next()).toMatchObject({type: 'digits', value: '123'}) expect(lexer.next()).toMatchObject({type: 'error', value: 'foo', offset: 3}) From 9aa0c81c385aec67a260d626ce347cb63f398e98 Mon Sep 17 00:00:00 2001 From: Tim Radvan Date: Sun, 16 Sep 2018 17:28:18 +0100 Subject: [PATCH 2/7] Warn if both keywords and type are set --- moo.js | 12 +++++++++--- test/test.js | 9 +++++++++ 2 files changed, 18 insertions(+), 3 deletions(-) diff --git a/moo.js b/moo.js index 580280c..66fc973 100644 --- a/moo.js +++ b/moo.js @@ -130,6 +130,15 @@ } } + // `keywords: obj` is shorthand for `type: moo.keywords(obj)` + if (options.keywords) { + // Warn if both keywords and type are set + if (options.type) { + throw new Error("Cannot have both keywords and type (for token '" + options.defaultType + "')") + } + options.type = keywordTransform(options.keywords) + } + // convert to array var match = options.match options.match = Array.isArray(match) ? match : match ? [match] : [] @@ -137,9 +146,6 @@ return isRegExp(a) && isRegExp(b) ? 0 : isRegExp(b) ? -1 : isRegExp(a) ? +1 : b.length - a.length }) - if (options.keywords) { - options.type = keywordTransform(options.keywords) - } return options } diff --git a/test/test.js b/test/test.js index 1ac03c5..338833f 100644 --- a/test/test.js +++ b/test/test.js @@ -397,6 +397,15 @@ describe('type transforms', () => { expect(lexer.next()).toMatchObject({ type: 'identifier', value: 'cheese'}) }) + test('cannot set both type and keywords', () => { + expect(() => compile({ + identifier: { + type: () => 'moo', + keywords: {foo: 'keyword'}, + }, + })).toThrow("Cannot have both keywords and type (for token 'identifier')") + }) + }) describe('value transforms', () => { From 90c598001e3bd6771031d307a270faf431c70a8d Mon Sep 17 00:00:00 2001 From: Tim Radvan Date: Sun, 16 Sep 2018 17:34:30 +0100 Subject: [PATCH 3/7] Warn if type is a string --- moo.js | 7 ++++++- test/test.js | 8 ++++++++ 2 files changed, 14 insertions(+), 1 deletion(-) diff --git a/moo.js b/moo.js index 66fc973..d25c611 100644 --- a/moo.js +++ b/moo.js @@ -134,11 +134,16 @@ if (options.keywords) { // Warn if both keywords and type are set if (options.type) { - throw new Error("Cannot have both keywords and type (for token '" + options.defaultType + "')") + throw new Error("Cannot have both keywords and type (for token '" + name + "')") } options.type = keywordTransform(options.keywords) } + // type transform cannot be a string + if (typeof options.type === 'string') { + throw new Error("Type transform cannot be a string (type '" + options.type + "' for token '" + name + "')") + } + // convert to array var match = options.match options.match = Array.isArray(match) ? match : match ? [match] : [] diff --git a/test/test.js b/test/test.js index 338833f..22bd675 100644 --- a/test/test.js +++ b/test/test.js @@ -406,6 +406,14 @@ describe('type transforms', () => { })).toThrow("Cannot have both keywords and type (for token 'identifier')") }) + test('cannot set type to a string', () => { + expect(() => compile({ + identifier: { + type: 'moo', + }, + })).toThrow("Type transform cannot be a string (type 'moo' for token 'identifier')") + }) + }) describe('value transforms', () => { From 7687bca23e7f229e3d6fe912de9116c6efea555d Mon Sep 17 00:00:00 2001 From: Tim Radvan Date: Sun, 16 Sep 2018 17:37:44 +0100 Subject: [PATCH 4/7] Rename name -> type --- moo.js | 16 ++++++++-------- test/test.js | 8 ++++---- test/tosh.js | 52 ++++++++++++++++++++++++++-------------------------- 3 files changed, 38 insertions(+), 38 deletions(-) diff --git a/moo.js b/moo.js index d25c611..74bc2da 100644 --- a/moo.js +++ b/moo.js @@ -93,15 +93,15 @@ } continue } - if (!obj.name) { - throw new Error('Rule has no name: ' + JSON.stringify(obj)) + if (!obj.type) { + throw new Error('Rule has no type: ' + JSON.stringify(obj)) } - result.push(ruleOptions(obj.name, obj)) + result.push(ruleOptions(obj.type, obj)) } return result } - function ruleOptions(name, obj) { + function ruleOptions(type, obj) { if (!isObject(obj)) { obj = { match: obj } } @@ -111,7 +111,7 @@ // nb. error and fallback imply lineBreaks var options = { - defaultType: name, + defaultType: type, lineBreaks: !!obj.error || !!obj.fallback, pop: false, next: null, @@ -134,14 +134,14 @@ if (options.keywords) { // Warn if both keywords and type are set if (options.type) { - throw new Error("Cannot have both keywords and type (for token '" + name + "')") + throw new Error("Cannot have both keywords and type (for token '" + type + "')") } options.type = keywordTransform(options.keywords) } // type transform cannot be a string - if (typeof options.type === 'string') { - throw new Error("Type transform cannot be a string (type '" + options.type + "' for token '" + name + "')") + if (typeof options.type === 'string' && type !== options.type) { + throw new Error("Type transform cannot be a string (type '" + options.type + "' for token '" + type + "')") } // convert to array diff --git a/test/test.js b/test/test.js index 22bd675..3938c62 100644 --- a/test/test.js +++ b/test/test.js @@ -110,10 +110,10 @@ describe('compiler', () => { test('accepts rules in an array', () => { const lexer = compile([ - { name: 'keyword', match: 'Bob'}, - { name: 'word', match: /[a-z]+/}, - { name: 'number', match: /[0-9]+/}, - { name: 'space', match: / +/}, + { type: 'keyword', match: 'Bob'}, + { type: 'word', match: /[a-z]+/}, + { type: 'number', match: /[0-9]+/}, + { type: 'space', match: / +/}, ]) lexer.reset('Bob ducks are 123 bad') expect(lexer.next()).toMatchObject({type: 'keyword', value: 'Bob'}) diff --git a/test/tosh.js b/test/tosh.js index 6a22ec5..6020174 100644 --- a/test/tosh.js +++ b/test/tosh.js @@ -2,32 +2,32 @@ const moo = require('../moo') let toshLexer = moo.compile([ - {name: 'symbol', match: Array.from('-%#+*/=^,?')}, // single character - {name: 'WS', match: /[ \t]+/}, - {name: 'ellips', match: /\.{3}/}, - {name: 'comment', match: /\/{2}.*$/}, - {name: 'false', match: /\<\>/}, - {name: 'zero', match: /\(\)/}, - {name: 'empty', match: /_(?: |$)/}, - {name: 'number', match: /[0-9]+(?:\.[0-9]+)?e-?[0-9]+/}, // 123[.123]e[-]123 - {name: 'number', match: /(?:0|[1-9][0-9]*)?\.[0-9]+/}, // [123].123 - {name: 'number', match: /(?:0|[1-9][0-9]*)\.[0-9]*/}, // 123.[123] - {name: 'number', match: /0|[1-9][0-9]*/}, // 123 - {name: 'color', match: /#(?:[A-Fa-f0-9]{3}){2}/}, - {name: 'string', match: /"(?:\\["\\]|[^\n"\\])*"/}, // strings are backslash-escaped - {name: 'string', match: /'(?:\\['\\]|[^\n'\\])*'/}, - {name: 'lparen', match: /\(/}, - {name: 'rparen', match: /\)/}, - {name: 'langle', match: /\/}, - {name: 'lsquare', match: /\[/}, - {name: 'rsquare', match: /\]/}, - {name: 'cloud', match: /[☁]/}, - {name: 'input', match: /%[a-z](?:\.[a-zA-Z]+)?/}, - {name: 'symbol', match: /[_A-Za-z][-_A-Za-z0-9:',.]*/}, // word, as in a block - {name: 'iden', match: /[^\n \t"'()<>=*\/+-]+/}, // user-defined names - {name: 'NL', match: /\n/, lineBreaks: true }, - {name: 'ERROR', error: true}, + {type: 'symbol', match: Array.from('-%#+*/=^,?')}, // single character + {type: 'WS', match: /[ \t]+/}, + {type: 'ellips', match: /\.{3}/}, + {type: 'comment', match: /\/{2}.*$/}, + {type: 'false', match: /\<\>/}, + {type: 'zero', match: /\(\)/}, + {type: 'empty', match: /_(?: |$)/}, + {type: 'number', match: /[0-9]+(?:\.[0-9]+)?e-?[0-9]+/}, // 123[.123]e[-]123 + {type: 'number', match: /(?:0|[1-9][0-9]*)?\.[0-9]+/}, // [123].123 + {type: 'number', match: /(?:0|[1-9][0-9]*)\.[0-9]*/}, // 123.[123] + {type: 'number', match: /0|[1-9][0-9]*/}, // 123 + {type: 'color', match: /#(?:[A-Fa-f0-9]{3}){2}/}, + {type: 'string', match: /"(?:\\["\\]|[^\n"\\])*"/}, // strings are backslash-escaped + {type: 'string', match: /'(?:\\['\\]|[^\n'\\])*'/}, + {type: 'lparen', match: /\(/}, + {type: 'rparen', match: /\)/}, + {type: 'langle', match: /\/}, + {type: 'lsquare', match: /\[/}, + {type: 'rsquare', match: /\]/}, + {type: 'cloud', match: /[☁]/}, + {type: 'input', match: /%[a-z](?:\.[a-zA-Z]+)?/}, + {type: 'symbol', match: /[_A-Za-z][-_A-Za-z0-9:',.]*/}, // word, as in a block + {type: 'iden', match: /[^\n \t"'()<>=*\/+-]+/}, // user-defined type + {type: 'NL', match: /\n/, lineBreaks: true }, + {type: 'ERROR', error: true}, ]) function tokenize(source) { From 1bfa7139f0dd1ed2cc56800d71163e4e25538f49 Mon Sep 17 00:00:00 2001 From: Tim Radvan Date: Sun, 16 Sep 2018 17:41:39 +0100 Subject: [PATCH 5/7] Remove keywords shorthand --- README.md | 10 +++++----- moo.js | 12 ------------ test/test.js | 31 +++++++++++-------------------- 3 files changed, 16 insertions(+), 37 deletions(-) diff --git a/README.md b/README.md index fa14472..e3b42e5 100644 --- a/README.md +++ b/README.md @@ -175,13 +175,13 @@ Moo makes it convenient to define literals. It'll automatically compile them into regular expressions, escaping them where necessary. -**Keywords** should be written using the `keywords` attribute. +**Keywords** should be written using the `keywords` transform. ```js moo.compile({ - IDEN: {match: /[a-zA-Z]+/, keywords: { + IDEN: {match: /[a-zA-Z]+/, type: moo.keywords({ KW: ['while', 'if', 'else', 'moo', 'cows'], - }}, + })}, SPACE: {match: /\s+/, lineBreaks: true}, }) ``` @@ -209,11 +209,11 @@ Keywords can also have **individual types**. ```js let lexer = moo.compile({ - name: {match: /[a-zA-Z]+/, keywords: { + name: {match: /[a-zA-Z]+/, type: moo.keywords({ 'kw-class': 'class', 'kw-def': 'def', 'kw-if': 'if', - }}, + })}, // ... }) lexer.reset('def foo') diff --git a/moo.js b/moo.js index 74bc2da..fa5053c 100644 --- a/moo.js +++ b/moo.js @@ -130,15 +130,6 @@ } } - // `keywords: obj` is shorthand for `type: moo.keywords(obj)` - if (options.keywords) { - // Warn if both keywords and type are set - if (options.type) { - throw new Error("Cannot have both keywords and type (for token '" + type + "')") - } - options.type = keywordTransform(options.keywords) - } - // type transform cannot be a string if (typeof options.type === 'string' && type !== options.type) { throw new Error("Type transform cannot be a string (type '" + options.type + "' for token '" + type + "')") @@ -576,9 +567,6 @@ for (var i = 0; i < groups.length; i++) { var group = groups[i] if (group.defaultType === tokenType) return true - if (group.keywords && hasOwnProperty.call(group.keywords, tokenType)) { - return true - } } } return false diff --git a/test/test.js b/test/test.js index 3938c62..cc7186e 100644 --- a/test/test.js +++ b/test/test.js @@ -304,10 +304,10 @@ describe('keywords', () => { } check(compile({ - identifier: {match: /[a-zA-Z]+/, keywords: {keyword: 'class'}}, + identifier: {match: /[a-zA-Z]+/, type: moo.keywords({keyword: 'class'})}, })) check(compile({ - identifier: {match: /[a-zA-Z]+/, keywords: {keyword: ['class']}}, + identifier: {match: /[a-zA-Z]+/, type: moo.keywords({keyword: ['class']})}, })) }) @@ -315,11 +315,11 @@ describe('keywords', () => { let lexer = compile({ identifier: { match: /[a-zA-Z]+/, - keywords: { + type: moo.keywords({ 'kw-class': 'class', 'kw-def': 'def', 'kw-if': 'if', - }, + }), }, space: {match: /\s+/, lineBreaks: true}, }) @@ -335,9 +335,9 @@ describe('keywords', () => { expect(() => compile({ identifier: { match: /[a-zA-Z]+/, - keywords: { + type: moo.keywords({ 'kw-class': {foo: 'bar'}, - }, + }), }, })).toThrow("keyword must be string (in keyword 'kw-class')") }) @@ -397,15 +397,6 @@ describe('type transforms', () => { expect(lexer.next()).toMatchObject({ type: 'identifier', value: 'cheese'}) }) - test('cannot set both type and keywords', () => { - expect(() => compile({ - identifier: { - type: () => 'moo', - keywords: {foo: 'keyword'}, - }, - })).toThrow("Cannot have both keywords and type (for token 'identifier')") - }) - test('cannot set type to a string', () => { expect(() => compile({ identifier: { @@ -526,7 +517,7 @@ describe('lexer', () => { // TODO: why does toString() return the value? const lexer = compile({ apples: 'a', - name: {match: /[a-z]/, keywords: { kw: ['m'] }}, + name: {match: /[a-z]/, type: moo.keywords({ kw: ['m'] })}, }).reset('azm') expect(String(lexer.next())).toBe('a') expect(String(lexer.next())).toBe('z') @@ -581,17 +572,17 @@ describe('Lexer#has', () => { const keywordLexer = compile({ identifier: { match: /[a-zA-Z]+/, - keywords: { + type: moo.keywords({ 'kw-class': 'class', 'kw-def': 'def', 'kw-if': 'if', - }, + }), }, }) - test('works with keywords', () => { + test("doesn't work with keywords", () => { expect(keywordLexer.has('identifier')).toBe(true) - expect(keywordLexer.has('kw-class')).toBe(true) + expect(keywordLexer.has('kw-class')).toBe(false) }) // Example from the readme. From 24b23ca961232df15f870f9c8db1c933f2a31e21 Mon Sep 17 00:00:00 2001 From: Tim Radvan Date: Sun, 16 Sep 2018 18:05:39 +0100 Subject: [PATCH 6/7] Deprecate Lexer#has --- moo.js | 11 +---------- test/test.js | 29 ++++++++--------------------- 2 files changed, 9 insertions(+), 31 deletions(-) diff --git a/moo.js b/moo.js index fa5053c..9e670be 100644 --- a/moo.js +++ b/moo.js @@ -560,16 +560,7 @@ } Lexer.prototype.has = function(tokenType) { - for (var s in this.states) { - var state = this.states[s] - if (state.error && state.error.defaultType === tokenType) return true - var groups = state.groups - for (var i = 0; i < groups.length; i++) { - var group = groups[i] - if (group.defaultType === tokenType) return true - } - } - return false + return true } diff --git a/test/test.js b/test/test.js index cc7186e..d5cc648 100644 --- a/test/test.js +++ b/test/test.js @@ -561,12 +561,8 @@ describe('Lexer#has', () => { expect(basicLexer.has('error')).toBe(true) }) - test('returns false for nonexistent junk', () => { - expect(basicLexer.has('random')).toBe(false) - }) - - test('returns false for stuff inherited from Object', () => { - expect(basicLexer.has('hasOwnProperty')).toBe(false) + test('returns true even for nonexistent junk', () => { + expect(basicLexer.has('random')).toBe(true) }) const keywordLexer = compile({ @@ -580,9 +576,8 @@ describe('Lexer#has', () => { }, }) - test("doesn't work with keywords", () => { - expect(keywordLexer.has('identifier')).toBe(true) - expect(keywordLexer.has('kw-class')).toBe(false) + test("returns true even for keywords", () => { + expect(keywordLexer.has('kw-class')).toBe(true) }) // Example from the readme. @@ -613,20 +608,12 @@ describe('Lexer#has', () => { expect(statefulLexer.has('interp')).toEqual(true) }) - test('works with error tokens - for first state', () => { - expect(statefulLexer.has('mainErr')).toEqual(true) - }) - - test('works with error tokens - for second state', () => { - expect(statefulLexer.has('litErr')).toEqual(true) - }) - - test('returns false for the state names themselves', () => { - expect(statefulLexer.has('main')).toEqual(false) + test('works with error tokens - for first state', () => { + expect(statefulLexer.has('mainErr')).toEqual(true) }) - test('returns false for stuff inherited from Object when using states', () => { - expect(statefulLexer.has('toString')).toEqual(false) + test('works with error tokens - for second state', () => { + expect(statefulLexer.has('litErr')).toEqual(true) }) }) From 97626323b90f7409be37b0765cf52b2296f52b38 Mon Sep 17 00:00:00 2001 From: Tim Radvan Date: Wed, 19 Sep 2018 13:24:27 +0100 Subject: [PATCH 7/7] Test that type transforms work with arrays --- test/test.js | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/test/test.js b/test/test.js index d5cc648..fb6b2ea 100644 --- a/test/test.js +++ b/test/test.js @@ -405,6 +405,23 @@ describe('type transforms', () => { })).toThrow("Type transform cannot be a string (type 'moo' for token 'identifier')") }) + test('can be used in an array', () => { + const lexer = compile([ + { type: (name) => 'word-' + name, match: /[a-z]+/}, + { type: 'space', match: / +/}, + ]) + lexer.reset('foo ') + expect(lexer.next()).toMatchObject({type: 'word-foo', value: 'foo'}) + expect(lexer.next()).toMatchObject({type: 'space', value: ' '}) + }) + + test('may result in questionable errors', () => { + const myTransform = function() {} + expect(() => compile([ + { type: myTransform, next: 'moo'}, + ])).toThrow("State-switching options are not allowed in stateless lexers (for token 'function () {}')") + }) + }) describe('value transforms', () => {