Initial commit
This commit is contained in:
29
node_modules/moo/LICENSE
generated
vendored
Normal file
29
node_modules/moo/LICENSE
generated
vendored
Normal file
@@ -0,0 +1,29 @@
|
||||
BSD 3-Clause License
|
||||
|
||||
Copyright (c) 2017, Tim Radvan (tjvr)
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice, this
|
||||
list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright notice,
|
||||
this list of conditions and the following disclaimer in the documentation
|
||||
and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of the copyright holder nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
||||
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
383
node_modules/moo/README.md
generated
vendored
Normal file
383
node_modules/moo/README.md
generated
vendored
Normal file
@@ -0,0 +1,383 @@
|
||||

|
||||
|
||||
Moo!
|
||||
====
|
||||
|
||||
Moo is a highly-optimised tokenizer/lexer generator. Use it to tokenize your strings, before parsing 'em with a parser like [nearley](https://github.com/hardmath123/nearley) or whatever else you're into.
|
||||
|
||||
* [Fast](#is-it-fast)
|
||||
* [Convenient](#usage)
|
||||
* uses [Regular Expressions](#on-regular-expressions)
|
||||
* tracks [Line Numbers](#line-numbers)
|
||||
* handles [Keywords](#keywords)
|
||||
* supports [States](#states)
|
||||
* custom [Errors](#errors)
|
||||
* is even [Iterable](#iteration)
|
||||
* has no dependencies
|
||||
* 4KB minified + gzipped
|
||||
* Moo!
|
||||
|
||||
Is it fast?
|
||||
-----------
|
||||
|
||||
Yup! Flying-cows-and-singed-steak fast.
|
||||
|
||||
Moo is the fastest JS tokenizer around. It's **~2–10x** faster than most other tokenizers; it's a **couple orders of magnitude** faster than some of the slower ones.
|
||||
|
||||
Define your tokens **using regular expressions**. Moo will compile 'em down to a **single RegExp for performance**. It uses the new ES6 **sticky flag** where possible to make things faster; otherwise it falls back to an almost-as-efficient workaround. (For more than you ever wanted to know about this, read [adventures in the land of substrings and RegExps](http://mrale.ph/blog/2016/11/23/making-less-dart-faster.html).)
|
||||
|
||||
You _might_ be able to go faster still by writing your lexer by hand rather than using RegExps, but that's icky.
|
||||
|
||||
Oh, and it [avoids parsing RegExps by itself](https://hackernoon.com/the-madness-of-parsing-real-world-javascript-regexps-d9ee336df983#.2l8qu3l76). Because that would be horrible.
|
||||
|
||||
|
||||
Usage
|
||||
-----
|
||||
|
||||
First, you need to do the needful: `$ npm install moo`, or whatever will ship this code to your computer. Alternatively, grab the `moo.js` file by itself and slap it into your web page via a `<script>` tag; moo is completely standalone.
|
||||
|
||||
Then you can start roasting your very own lexer/tokenizer:
|
||||
|
||||
```js
|
||||
const moo = require('moo')
|
||||
|
||||
let lexer = moo.compile({
|
||||
WS: /[ \t]+/,
|
||||
comment: /\/\/.*?$/,
|
||||
number: /0|[1-9][0-9]*/,
|
||||
string: /"(?:\\["\\]|[^\n"\\])*"/,
|
||||
lparen: '(',
|
||||
rparen: ')',
|
||||
keyword: ['while', 'if', 'else', 'moo', 'cows'],
|
||||
NL: { match: /\n/, lineBreaks: true },
|
||||
})
|
||||
```
|
||||
|
||||
And now throw some text at it:
|
||||
|
||||
```js
|
||||
lexer.reset('while (10) cows\nmoo')
|
||||
lexer.next() // -> { type: 'keyword', value: 'while' }
|
||||
lexer.next() // -> { type: 'WS', value: ' ' }
|
||||
lexer.next() // -> { type: 'lparen', value: '(' }
|
||||
lexer.next() // -> { type: 'number', value: '10' }
|
||||
// ...
|
||||
```
|
||||
|
||||
When you reach the end of Moo's internal buffer, next() will return `undefined`. You can always `reset()` it and feed it more data when that happens.
|
||||
|
||||
|
||||
On Regular Expressions
|
||||
----------------------
|
||||
|
||||
RegExps are nifty for making tokenizers, but they can be a bit of a pain. Here are some things to be aware of:
|
||||
|
||||
* You often want to use **non-greedy quantifiers**: e.g. `*?` instead of `*`. Otherwise your tokens will be longer than you expect:
|
||||
|
||||
```js
|
||||
let lexer = moo.compile({
|
||||
string: /".*"/, // greedy quantifier *
|
||||
// ...
|
||||
})
|
||||
|
||||
lexer.reset('"foo" "bar"')
|
||||
lexer.next() // -> { type: 'string', value: 'foo" "bar' }
|
||||
```
|
||||
|
||||
Better:
|
||||
|
||||
```js
|
||||
let lexer = moo.compile({
|
||||
string: /".*?"/, // non-greedy quantifier *?
|
||||
// ...
|
||||
})
|
||||
|
||||
lexer.reset('"foo" "bar"')
|
||||
lexer.next() // -> { type: 'string', value: 'foo' }
|
||||
lexer.next() // -> { type: 'space', value: ' ' }
|
||||
lexer.next() // -> { type: 'string', value: 'bar' }
|
||||
```
|
||||
|
||||
* The **order of your rules** matters. Earlier ones will take precedence.
|
||||
|
||||
```js
|
||||
moo.compile({
|
||||
identifier: /[a-z0-9]+/,
|
||||
number: /[0-9]+/,
|
||||
}).reset('42').next() // -> { type: 'identifier', value: '42' }
|
||||
|
||||
moo.compile({
|
||||
number: /[0-9]+/,
|
||||
identifier: /[a-z0-9]+/,
|
||||
}).reset('42').next() // -> { type: 'number', value: '42' }
|
||||
```
|
||||
|
||||
* Moo uses **multiline RegExps**. This has a few quirks: for example, the **dot `/./` doesn't include newlines**. Use `[^]` instead if you want to match newlines too.
|
||||
|
||||
* Since an excluding character ranges like `/[^ ]/` (which matches anything but a space) _will_ include newlines, you have to be careful not to include them by accident! In particular, the whitespace metacharacter `\s` includes newlines.
|
||||
|
||||
|
||||
Line Numbers
|
||||
------------
|
||||
|
||||
Moo tracks detailed information about the input for you.
|
||||
|
||||
It will track line numbers, as long as you **apply the `lineBreaks: true` option to any rules which might contain newlines**. Moo will try to warn you if you forget to do this.
|
||||
|
||||
Note that this is `false` by default, for performance reasons: counting the number of lines in a matched token has a small cost. For optimal performance, only match newlines inside a dedicated token:
|
||||
|
||||
```js
|
||||
newline: {match: '\n', lineBreaks: true},
|
||||
```
|
||||
|
||||
|
||||
### Token Info ###
|
||||
|
||||
Token objects (returned from `next()`) have the following attributes:
|
||||
|
||||
* **`type`**: the name of the group, as passed to compile.
|
||||
* **`text`**: the string that was matched.
|
||||
* **`value`**: the string that was matched, transformed by your `value` function (if any).
|
||||
* **`offset`**: the number of bytes from the start of the buffer where the match starts.
|
||||
* **`lineBreaks`**: the number of line breaks found in the match. (Always zero if this rule has `lineBreaks: false`.)
|
||||
* **`line`**: the line number of the beginning of the match, starting from 1.
|
||||
* **`col`**: the column where the match begins, starting from 1.
|
||||
|
||||
|
||||
### Value vs. Text ###
|
||||
|
||||
The `value` is the same as the `text`, unless you provide a [value transform](#transform).
|
||||
|
||||
```js
|
||||
const moo = require('moo')
|
||||
|
||||
const lexer = moo.compile({
|
||||
ws: /[ \t]+/,
|
||||
string: {match: /"(?:\\["\\]|[^\n"\\])*"/, value: s => s.slice(1, -1)},
|
||||
})
|
||||
|
||||
lexer.reset('"test"')
|
||||
lexer.next() /* { value: 'test', text: '"test"', ... } */
|
||||
```
|
||||
|
||||
|
||||
### Reset ###
|
||||
|
||||
Calling `reset()` on your lexer will empty its internal buffer, and set the line, column, and offset counts back to their initial value.
|
||||
|
||||
If you don't want this, you can `save()` the state, and later pass it as the second argument to `reset()` to explicitly control the internal state of the lexer.
|
||||
|
||||
```js
|
||||
lexer.reset('some line\n')
|
||||
let info = lexer.save() // -> { line: 10 }
|
||||
lexer.next() // -> { line: 10 }
|
||||
lexer.next() // -> { line: 11 }
|
||||
// ...
|
||||
lexer.reset('a different line\n', info)
|
||||
lexer.next() // -> { line: 10 }
|
||||
```
|
||||
|
||||
|
||||
Keywords
|
||||
--------
|
||||
|
||||
Moo makes it convenient to define literals.
|
||||
|
||||
```js
|
||||
moo.compile({
|
||||
lparen: '(',
|
||||
rparen: ')',
|
||||
keyword: ['while', 'if', 'else', 'moo', 'cows'],
|
||||
})
|
||||
```
|
||||
|
||||
It'll automatically compile them into regular expressions, escaping them where necessary.
|
||||
|
||||
**Keywords** should be written using the `keywords` transform.
|
||||
|
||||
```js
|
||||
moo.compile({
|
||||
IDEN: {match: /[a-zA-Z]+/, type: moo.keywords({
|
||||
KW: ['while', 'if', 'else', 'moo', 'cows'],
|
||||
})},
|
||||
SPACE: {match: /\s+/, lineBreaks: true},
|
||||
})
|
||||
```
|
||||
|
||||
|
||||
### Why? ###
|
||||
|
||||
You need to do this to ensure the **longest match** principle applies, even in edge cases.
|
||||
|
||||
Imagine trying to parse the input `className` with the following rules:
|
||||
|
||||
```js
|
||||
keyword: ['class'],
|
||||
identifier: /[a-zA-Z]+/,
|
||||
```
|
||||
|
||||
You'll get _two_ tokens — `['class', 'Name']` -- which is _not_ what you want! If you swap the order of the rules, you'll fix this example; but now you'll lex `class` wrong (as an `identifier`).
|
||||
|
||||
The keywords helper checks matches against the list of keywords; if any of them match, it uses the type `'keyword'` instead of `'identifier'` (for this example).
|
||||
|
||||
|
||||
### Keyword Types ###
|
||||
|
||||
Keywords can also have **individual types**.
|
||||
|
||||
```js
|
||||
let lexer = moo.compile({
|
||||
name: {match: /[a-zA-Z]+/, type: moo.keywords({
|
||||
'kw-class': 'class',
|
||||
'kw-def': 'def',
|
||||
'kw-if': 'if',
|
||||
})},
|
||||
// ...
|
||||
})
|
||||
lexer.reset('def foo')
|
||||
lexer.next() // -> { type: 'kw-def', value: 'def' }
|
||||
lexer.next() // space
|
||||
lexer.next() // -> { type: 'name', value: 'foo' }
|
||||
```
|
||||
|
||||
You can use `Object.fromEntries` to easily construct keyword objects:
|
||||
|
||||
```js
|
||||
Object.fromEntries(['class', 'def', 'if'].map(k => ['kw-' + k, k]))
|
||||
```
|
||||
|
||||
|
||||
States
|
||||
------
|
||||
|
||||
Moo allows you to define multiple lexer **states**. Each state defines its own separate set of token rules. Your lexer will start off in the first state given to `moo.states({})`.
|
||||
|
||||
Rules can be annotated with `next`, `push`, and `pop`, to change the current state after that token is matched. A "stack" of past states is kept, which is used by `push` and `pop`.
|
||||
|
||||
* **`next: 'bar'`** moves to the state named `bar`. (The stack is not changed.)
|
||||
* **`push: 'bar'`** moves to the state named `bar`, and pushes the old state onto the stack.
|
||||
* **`pop: 1`** removes one state from the top of the stack, and moves to that state. (Only `1` is supported.)
|
||||
|
||||
Only rules from the current state can be matched. You need to copy your rule into all the states you want it to be matched in.
|
||||
|
||||
For example, to tokenize JS-style string interpolation such as `a${{c: d}}e`, you might use:
|
||||
|
||||
```js
|
||||
let lexer = moo.states({
|
||||
main: {
|
||||
strstart: {match: '`', push: 'lit'},
|
||||
ident: /\w+/,
|
||||
lbrace: {match: '{', push: 'main'},
|
||||
rbrace: {match: '}', pop: 1},
|
||||
colon: ':',
|
||||
space: {match: /\s+/, lineBreaks: true},
|
||||
},
|
||||
lit: {
|
||||
interp: {match: '${', push: 'main'},
|
||||
escape: /\\./,
|
||||
strend: {match: '`', pop: 1},
|
||||
const: {match: /(?:[^$`]|\$(?!\{))+/, lineBreaks: true},
|
||||
},
|
||||
})
|
||||
// <= `a${{c: d}}e`
|
||||
// => strstart const interp lbrace ident colon space ident rbrace rbrace const strend
|
||||
```
|
||||
|
||||
The `rbrace` rule is annotated with `pop`, so it moves from the `main` state into either `lit` or `main`, depending on the stack.
|
||||
|
||||
|
||||
Errors
|
||||
------
|
||||
|
||||
If none of your rules match, Moo will throw an Error; since it doesn't know what else to do.
|
||||
|
||||
If you prefer, you can have moo return an error token instead of throwing an exception. The error token will contain the whole of the rest of the buffer.
|
||||
|
||||
```js
|
||||
moo.compile({
|
||||
// ...
|
||||
myError: moo.error,
|
||||
})
|
||||
|
||||
moo.reset('invalid')
|
||||
moo.next() // -> { type: 'myError', value: 'invalid', text: 'invalid', offset: 0, lineBreaks: 0, line: 1, col: 1 }
|
||||
moo.next() // -> undefined
|
||||
```
|
||||
|
||||
You can have a token type that both matches tokens _and_ contains error values.
|
||||
|
||||
```js
|
||||
moo.compile({
|
||||
// ...
|
||||
myError: {match: /[\$?`]/, error: true},
|
||||
})
|
||||
```
|
||||
|
||||
### Formatting errors ###
|
||||
|
||||
If you want to throw an error from your parser, you might find `formatError` helpful. Call it with the offending token:
|
||||
|
||||
```js
|
||||
throw new Error(lexer.formatError(token, "invalid syntax"))
|
||||
```
|
||||
|
||||
It returns a string with a pretty error message.
|
||||
|
||||
```
|
||||
Error: invalid syntax at line 2 col 15:
|
||||
|
||||
totally valid `syntax`
|
||||
^
|
||||
```
|
||||
|
||||
|
||||
Iteration
|
||||
---------
|
||||
|
||||
Iterators: we got 'em.
|
||||
|
||||
```js
|
||||
for (let here of lexer) {
|
||||
// here = { type: 'number', value: '123', ... }
|
||||
}
|
||||
```
|
||||
|
||||
Create an array of tokens.
|
||||
|
||||
```js
|
||||
let tokens = Array.from(lexer);
|
||||
```
|
||||
|
||||
Use [itt](https://www.npmjs.com/package/itt)'s iteration tools with Moo.
|
||||
|
||||
```js
|
||||
for (let [here, next] of itt(lexer).lookahead()) { // pass a number if you need more tokens
|
||||
// enjoy!
|
||||
}
|
||||
```
|
||||
|
||||
|
||||
Transform
|
||||
---------
|
||||
|
||||
Moo doesn't allow capturing groups, but you can supply a transform function, `value()`, which will be called on the value before storing it in the Token object.
|
||||
|
||||
```js
|
||||
moo.compile({
|
||||
STRING: [
|
||||
{match: /"""[^]*?"""/, lineBreaks: true, value: x => x.slice(3, -3)},
|
||||
{match: /"(?:\\["\\rn]|[^"\\])*?"/, lineBreaks: true, value: x => x.slice(1, -1)},
|
||||
{match: /'(?:\\['\\rn]|[^'\\])*?'/, lineBreaks: true, value: x => x.slice(1, -1)},
|
||||
],
|
||||
// ...
|
||||
})
|
||||
```
|
||||
|
||||
|
||||
Contributing
|
||||
------------
|
||||
|
||||
Do check the [FAQ](https://github.com/tjvr/moo/issues?q=label%3Aquestion).
|
||||
|
||||
Before submitting an issue, [remember...](https://github.com/tjvr/moo/blob/master/.github/CONTRIBUTING.md)
|
||||
|
642
node_modules/moo/moo.js
generated
vendored
Normal file
642
node_modules/moo/moo.js
generated
vendored
Normal file
@@ -0,0 +1,642 @@
|
||||
(function(root, factory) {
|
||||
if (typeof define === 'function' && define.amd) {
|
||||
define([], factory) /* global define */
|
||||
} else if (typeof module === 'object' && module.exports) {
|
||||
module.exports = factory()
|
||||
} else {
|
||||
root.moo = factory()
|
||||
}
|
||||
}(this, function() {
|
||||
'use strict';
|
||||
|
||||
var hasOwnProperty = Object.prototype.hasOwnProperty
|
||||
var toString = Object.prototype.toString
|
||||
var hasSticky = typeof new RegExp().sticky === 'boolean'
|
||||
|
||||
/***************************************************************************/
|
||||
|
||||
function isRegExp(o) { return o && toString.call(o) === '[object RegExp]' }
|
||||
function isObject(o) { return o && typeof o === 'object' && !isRegExp(o) && !Array.isArray(o) }
|
||||
|
||||
function reEscape(s) {
|
||||
return s.replace(/[-\/\\^$*+?.()|[\]{}]/g, '\\$&')
|
||||
}
|
||||
function reGroups(s) {
|
||||
var re = new RegExp('|' + s)
|
||||
return re.exec('').length - 1
|
||||
}
|
||||
function reCapture(s) {
|
||||
return '(' + s + ')'
|
||||
}
|
||||
function reUnion(regexps) {
|
||||
if (!regexps.length) return '(?!)'
|
||||
var source = regexps.map(function(s) {
|
||||
return "(?:" + s + ")"
|
||||
}).join('|')
|
||||
return "(?:" + source + ")"
|
||||
}
|
||||
|
||||
function regexpOrLiteral(obj) {
|
||||
if (typeof obj === 'string') {
|
||||
return '(?:' + reEscape(obj) + ')'
|
||||
|
||||
} else if (isRegExp(obj)) {
|
||||
// TODO: consider /u support
|
||||
if (obj.ignoreCase) throw new Error('RegExp /i flag not allowed')
|
||||
if (obj.global) throw new Error('RegExp /g flag is implied')
|
||||
if (obj.sticky) throw new Error('RegExp /y flag is implied')
|
||||
if (obj.multiline) throw new Error('RegExp /m flag is implied')
|
||||
return obj.source
|
||||
|
||||
} else {
|
||||
throw new Error('Not a pattern: ' + obj)
|
||||
}
|
||||
}
|
||||
|
||||
function pad(s, length) {
|
||||
if (s.length > length) {
|
||||
return s
|
||||
}
|
||||
return Array(length - s.length + 1).join(" ") + s
|
||||
}
|
||||
|
||||
function lastNLines(string, numLines) {
|
||||
var position = string.length
|
||||
var lineBreaks = 0;
|
||||
while (true) {
|
||||
var idx = string.lastIndexOf("\n", position - 1)
|
||||
if (idx === -1) {
|
||||
break;
|
||||
} else {
|
||||
lineBreaks++
|
||||
}
|
||||
position = idx
|
||||
if (lineBreaks === numLines) {
|
||||
break;
|
||||
}
|
||||
if (position === 0) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
var startPosition =
|
||||
lineBreaks < numLines ?
|
||||
0 :
|
||||
position + 1
|
||||
return string.substring(startPosition).split("\n")
|
||||
}
|
||||
|
||||
function objectToRules(object) {
|
||||
var keys = Object.getOwnPropertyNames(object)
|
||||
var result = []
|
||||
for (var i = 0; i < keys.length; i++) {
|
||||
var key = keys[i]
|
||||
var thing = object[key]
|
||||
var rules = [].concat(thing)
|
||||
if (key === 'include') {
|
||||
for (var j = 0; j < rules.length; j++) {
|
||||
result.push({include: rules[j]})
|
||||
}
|
||||
continue
|
||||
}
|
||||
var match = []
|
||||
rules.forEach(function(rule) {
|
||||
if (isObject(rule)) {
|
||||
if (match.length) result.push(ruleOptions(key, match))
|
||||
result.push(ruleOptions(key, rule))
|
||||
match = []
|
||||
} else {
|
||||
match.push(rule)
|
||||
}
|
||||
})
|
||||
if (match.length) result.push(ruleOptions(key, match))
|
||||
}
|
||||
return result
|
||||
}
|
||||
|
||||
function arrayToRules(array) {
|
||||
var result = []
|
||||
for (var i = 0; i < array.length; i++) {
|
||||
var obj = array[i]
|
||||
if (obj.include) {
|
||||
var include = [].concat(obj.include)
|
||||
for (var j = 0; j < include.length; j++) {
|
||||
result.push({include: include[j]})
|
||||
}
|
||||
continue
|
||||
}
|
||||
if (!obj.type) {
|
||||
throw new Error('Rule has no type: ' + JSON.stringify(obj))
|
||||
}
|
||||
result.push(ruleOptions(obj.type, obj))
|
||||
}
|
||||
return result
|
||||
}
|
||||
|
||||
function ruleOptions(type, obj) {
|
||||
if (!isObject(obj)) {
|
||||
obj = { match: obj }
|
||||
}
|
||||
if (obj.include) {
|
||||
throw new Error('Matching rules cannot also include states')
|
||||
}
|
||||
|
||||
// nb. error and fallback imply lineBreaks
|
||||
var options = {
|
||||
defaultType: type,
|
||||
lineBreaks: !!obj.error || !!obj.fallback,
|
||||
pop: false,
|
||||
next: null,
|
||||
push: null,
|
||||
error: false,
|
||||
fallback: false,
|
||||
value: null,
|
||||
type: null,
|
||||
shouldThrow: false,
|
||||
}
|
||||
|
||||
// Avoid Object.assign(), so we support IE9+
|
||||
for (var key in obj) {
|
||||
if (hasOwnProperty.call(obj, key)) {
|
||||
options[key] = obj[key]
|
||||
}
|
||||
}
|
||||
|
||||
// type transform cannot be a string
|
||||
if (typeof options.type === 'string' && type !== options.type) {
|
||||
throw new Error("Type transform cannot be a string (type '" + options.type + "' for token '" + type + "')")
|
||||
}
|
||||
|
||||
// convert to array
|
||||
var match = options.match
|
||||
options.match = Array.isArray(match) ? match : match ? [match] : []
|
||||
options.match.sort(function(a, b) {
|
||||
return isRegExp(a) && isRegExp(b) ? 0
|
||||
: isRegExp(b) ? -1 : isRegExp(a) ? +1 : b.length - a.length
|
||||
})
|
||||
return options
|
||||
}
|
||||
|
||||
function toRules(spec) {
|
||||
return Array.isArray(spec) ? arrayToRules(spec) : objectToRules(spec)
|
||||
}
|
||||
|
||||
var defaultErrorRule = ruleOptions('error', {lineBreaks: true, shouldThrow: true})
|
||||
function compileRules(rules, hasStates) {
|
||||
var errorRule = null
|
||||
var fast = Object.create(null)
|
||||
var fastAllowed = true
|
||||
var unicodeFlag = null
|
||||
var groups = []
|
||||
var parts = []
|
||||
|
||||
// If there is a fallback rule, then disable fast matching
|
||||
for (var i = 0; i < rules.length; i++) {
|
||||
if (rules[i].fallback) {
|
||||
fastAllowed = false
|
||||
}
|
||||
}
|
||||
|
||||
for (var i = 0; i < rules.length; i++) {
|
||||
var options = rules[i]
|
||||
|
||||
if (options.include) {
|
||||
// all valid inclusions are removed by states() preprocessor
|
||||
throw new Error('Inheritance is not allowed in stateless lexers')
|
||||
}
|
||||
|
||||
if (options.error || options.fallback) {
|
||||
// errorRule can only be set once
|
||||
if (errorRule) {
|
||||
if (!options.fallback === !errorRule.fallback) {
|
||||
throw new Error("Multiple " + (options.fallback ? "fallback" : "error") + " rules not allowed (for token '" + options.defaultType + "')")
|
||||
} else {
|
||||
throw new Error("fallback and error are mutually exclusive (for token '" + options.defaultType + "')")
|
||||
}
|
||||
}
|
||||
errorRule = options
|
||||
}
|
||||
|
||||
var match = options.match.slice()
|
||||
if (fastAllowed) {
|
||||
while (match.length && typeof match[0] === 'string' && match[0].length === 1) {
|
||||
var word = match.shift()
|
||||
fast[word.charCodeAt(0)] = options
|
||||
}
|
||||
}
|
||||
|
||||
// Warn about inappropriate state-switching options
|
||||
if (options.pop || options.push || options.next) {
|
||||
if (!hasStates) {
|
||||
throw new Error("State-switching options are not allowed in stateless lexers (for token '" + options.defaultType + "')")
|
||||
}
|
||||
if (options.fallback) {
|
||||
throw new Error("State-switching options are not allowed on fallback tokens (for token '" + options.defaultType + "')")
|
||||
}
|
||||
}
|
||||
|
||||
// Only rules with a .match are included in the RegExp
|
||||
if (match.length === 0) {
|
||||
continue
|
||||
}
|
||||
fastAllowed = false
|
||||
|
||||
groups.push(options)
|
||||
|
||||
// Check unicode flag is used everywhere or nowhere
|
||||
for (var j = 0; j < match.length; j++) {
|
||||
var obj = match[j]
|
||||
if (!isRegExp(obj)) {
|
||||
continue
|
||||
}
|
||||
|
||||
if (unicodeFlag === null) {
|
||||
unicodeFlag = obj.unicode
|
||||
} else if (unicodeFlag !== obj.unicode && options.fallback === false) {
|
||||
throw new Error('If one rule is /u then all must be')
|
||||
}
|
||||
}
|
||||
|
||||
// convert to RegExp
|
||||
var pat = reUnion(match.map(regexpOrLiteral))
|
||||
|
||||
// validate
|
||||
var regexp = new RegExp(pat)
|
||||
if (regexp.test("")) {
|
||||
throw new Error("RegExp matches empty string: " + regexp)
|
||||
}
|
||||
var groupCount = reGroups(pat)
|
||||
if (groupCount > 0) {
|
||||
throw new Error("RegExp has capture groups: " + regexp + "\nUse (?: … ) instead")
|
||||
}
|
||||
|
||||
// try and detect rules matching newlines
|
||||
if (!options.lineBreaks && regexp.test('\n')) {
|
||||
throw new Error('Rule should declare lineBreaks: ' + regexp)
|
||||
}
|
||||
|
||||
// store regex
|
||||
parts.push(reCapture(pat))
|
||||
}
|
||||
|
||||
|
||||
// If there's no fallback rule, use the sticky flag so we only look for
|
||||
// matches at the current index.
|
||||
//
|
||||
// If we don't support the sticky flag, then fake it using an irrefutable
|
||||
// match (i.e. an empty pattern).
|
||||
var fallbackRule = errorRule && errorRule.fallback
|
||||
var flags = hasSticky && !fallbackRule ? 'ym' : 'gm'
|
||||
var suffix = hasSticky || fallbackRule ? '' : '|'
|
||||
|
||||
if (unicodeFlag === true) flags += "u"
|
||||
var combined = new RegExp(reUnion(parts) + suffix, flags)
|
||||
return {regexp: combined, groups: groups, fast: fast, error: errorRule || defaultErrorRule}
|
||||
}
|
||||
|
||||
function compile(rules) {
|
||||
var result = compileRules(toRules(rules))
|
||||
return new Lexer({start: result}, 'start')
|
||||
}
|
||||
|
||||
function checkStateGroup(g, name, map) {
|
||||
var state = g && (g.push || g.next)
|
||||
if (state && !map[state]) {
|
||||
throw new Error("Missing state '" + state + "' (in token '" + g.defaultType + "' of state '" + name + "')")
|
||||
}
|
||||
if (g && g.pop && +g.pop !== 1) {
|
||||
throw new Error("pop must be 1 (in token '" + g.defaultType + "' of state '" + name + "')")
|
||||
}
|
||||
}
|
||||
function compileStates(states, start) {
|
||||
var all = states.$all ? toRules(states.$all) : []
|
||||
delete states.$all
|
||||
|
||||
var keys = Object.getOwnPropertyNames(states)
|
||||
if (!start) start = keys[0]
|
||||
|
||||
var ruleMap = Object.create(null)
|
||||
for (var i = 0; i < keys.length; i++) {
|
||||
var key = keys[i]
|
||||
ruleMap[key] = toRules(states[key]).concat(all)
|
||||
}
|
||||
for (var i = 0; i < keys.length; i++) {
|
||||
var key = keys[i]
|
||||
var rules = ruleMap[key]
|
||||
var included = Object.create(null)
|
||||
for (var j = 0; j < rules.length; j++) {
|
||||
var rule = rules[j]
|
||||
if (!rule.include) continue
|
||||
var splice = [j, 1]
|
||||
if (rule.include !== key && !included[rule.include]) {
|
||||
included[rule.include] = true
|
||||
var newRules = ruleMap[rule.include]
|
||||
if (!newRules) {
|
||||
throw new Error("Cannot include nonexistent state '" + rule.include + "' (in state '" + key + "')")
|
||||
}
|
||||
for (var k = 0; k < newRules.length; k++) {
|
||||
var newRule = newRules[k]
|
||||
if (rules.indexOf(newRule) !== -1) continue
|
||||
splice.push(newRule)
|
||||
}
|
||||
}
|
||||
rules.splice.apply(rules, splice)
|
||||
j--
|
||||
}
|
||||
}
|
||||
|
||||
var map = Object.create(null)
|
||||
for (var i = 0; i < keys.length; i++) {
|
||||
var key = keys[i]
|
||||
map[key] = compileRules(ruleMap[key], true)
|
||||
}
|
||||
|
||||
for (var i = 0; i < keys.length; i++) {
|
||||
var name = keys[i]
|
||||
var state = map[name]
|
||||
var groups = state.groups
|
||||
for (var j = 0; j < groups.length; j++) {
|
||||
checkStateGroup(groups[j], name, map)
|
||||
}
|
||||
var fastKeys = Object.getOwnPropertyNames(state.fast)
|
||||
for (var j = 0; j < fastKeys.length; j++) {
|
||||
checkStateGroup(state.fast[fastKeys[j]], name, map)
|
||||
}
|
||||
}
|
||||
|
||||
return new Lexer(map, start)
|
||||
}
|
||||
|
||||
function keywordTransform(map) {
|
||||
|
||||
// Use a JavaScript Map to map keywords to their corresponding token type
|
||||
// unless Map is unsupported, then fall back to using an Object:
|
||||
var isMap = typeof Map !== 'undefined'
|
||||
var reverseMap = isMap ? new Map : Object.create(null)
|
||||
|
||||
var types = Object.getOwnPropertyNames(map)
|
||||
for (var i = 0; i < types.length; i++) {
|
||||
var tokenType = types[i]
|
||||
var item = map[tokenType]
|
||||
var keywordList = Array.isArray(item) ? item : [item]
|
||||
keywordList.forEach(function(keyword) {
|
||||
if (typeof keyword !== 'string') {
|
||||
throw new Error("keyword must be string (in keyword '" + tokenType + "')")
|
||||
}
|
||||
if (isMap) {
|
||||
reverseMap.set(keyword, tokenType)
|
||||
} else {
|
||||
reverseMap[keyword] = tokenType
|
||||
}
|
||||
})
|
||||
}
|
||||
return function(k) {
|
||||
return isMap ? reverseMap.get(k) : reverseMap[k]
|
||||
}
|
||||
}
|
||||
|
||||
/***************************************************************************/
|
||||
|
||||
var Lexer = function(states, state) {
|
||||
this.startState = state
|
||||
this.states = states
|
||||
this.buffer = ''
|
||||
this.stack = []
|
||||
this.reset()
|
||||
}
|
||||
|
||||
Lexer.prototype.reset = function(data, info) {
|
||||
this.buffer = data || ''
|
||||
this.index = 0
|
||||
this.line = info ? info.line : 1
|
||||
this.col = info ? info.col : 1
|
||||
this.queuedToken = info ? info.queuedToken : null
|
||||
this.queuedText = info ? info.queuedText: "";
|
||||
this.queuedThrow = info ? info.queuedThrow : null
|
||||
this.setState(info ? info.state : this.startState)
|
||||
this.stack = info && info.stack ? info.stack.slice() : []
|
||||
return this
|
||||
}
|
||||
|
||||
Lexer.prototype.save = function() {
|
||||
return {
|
||||
line: this.line,
|
||||
col: this.col,
|
||||
state: this.state,
|
||||
stack: this.stack.slice(),
|
||||
queuedToken: this.queuedToken,
|
||||
queuedText: this.queuedText,
|
||||
queuedThrow: this.queuedThrow,
|
||||
}
|
||||
}
|
||||
|
||||
Lexer.prototype.setState = function(state) {
|
||||
if (!state || this.state === state) return
|
||||
this.state = state
|
||||
var info = this.states[state]
|
||||
this.groups = info.groups
|
||||
this.error = info.error
|
||||
this.re = info.regexp
|
||||
this.fast = info.fast
|
||||
}
|
||||
|
||||
Lexer.prototype.popState = function() {
|
||||
this.setState(this.stack.pop())
|
||||
}
|
||||
|
||||
Lexer.prototype.pushState = function(state) {
|
||||
this.stack.push(this.state)
|
||||
this.setState(state)
|
||||
}
|
||||
|
||||
var eat = hasSticky ? function(re, buffer) { // assume re is /y
|
||||
return re.exec(buffer)
|
||||
} : function(re, buffer) { // assume re is /g
|
||||
var match = re.exec(buffer)
|
||||
// will always match, since we used the |(?:) trick
|
||||
if (match[0].length === 0) {
|
||||
return null
|
||||
}
|
||||
return match
|
||||
}
|
||||
|
||||
Lexer.prototype._getGroup = function(match) {
|
||||
var groupCount = this.groups.length
|
||||
for (var i = 0; i < groupCount; i++) {
|
||||
if (match[i + 1] !== undefined) {
|
||||
return this.groups[i]
|
||||
}
|
||||
}
|
||||
throw new Error('Cannot find token type for matched text')
|
||||
}
|
||||
|
||||
function tokenToString() {
|
||||
return this.value
|
||||
}
|
||||
|
||||
Lexer.prototype.next = function() {
|
||||
var index = this.index
|
||||
|
||||
// If a fallback token matched, we don't need to re-run the RegExp
|
||||
if (this.queuedGroup) {
|
||||
var token = this._token(this.queuedGroup, this.queuedText, index)
|
||||
this.queuedGroup = null
|
||||
this.queuedText = ""
|
||||
return token
|
||||
}
|
||||
|
||||
var buffer = this.buffer
|
||||
if (index === buffer.length) {
|
||||
return // EOF
|
||||
}
|
||||
|
||||
// Fast matching for single characters
|
||||
var group = this.fast[buffer.charCodeAt(index)]
|
||||
if (group) {
|
||||
return this._token(group, buffer.charAt(index), index)
|
||||
}
|
||||
|
||||
// Execute RegExp
|
||||
var re = this.re
|
||||
re.lastIndex = index
|
||||
var match = eat(re, buffer)
|
||||
|
||||
// Error tokens match the remaining buffer
|
||||
var error = this.error
|
||||
if (match == null) {
|
||||
return this._token(error, buffer.slice(index, buffer.length), index)
|
||||
}
|
||||
|
||||
var group = this._getGroup(match)
|
||||
var text = match[0]
|
||||
|
||||
if (error.fallback && match.index !== index) {
|
||||
this.queuedGroup = group
|
||||
this.queuedText = text
|
||||
|
||||
// Fallback tokens contain the unmatched portion of the buffer
|
||||
return this._token(error, buffer.slice(index, match.index), index)
|
||||
}
|
||||
|
||||
return this._token(group, text, index)
|
||||
}
|
||||
|
||||
Lexer.prototype._token = function(group, text, offset) {
|
||||
// count line breaks
|
||||
var lineBreaks = 0
|
||||
if (group.lineBreaks) {
|
||||
var matchNL = /\n/g
|
||||
var nl = 1
|
||||
if (text === '\n') {
|
||||
lineBreaks = 1
|
||||
} else {
|
||||
while (matchNL.exec(text)) { lineBreaks++; nl = matchNL.lastIndex }
|
||||
}
|
||||
}
|
||||
|
||||
var token = {
|
||||
type: (typeof group.type === 'function' && group.type(text)) || group.defaultType,
|
||||
value: typeof group.value === 'function' ? group.value(text) : text,
|
||||
text: text,
|
||||
toString: tokenToString,
|
||||
offset: offset,
|
||||
lineBreaks: lineBreaks,
|
||||
line: this.line,
|
||||
col: this.col,
|
||||
}
|
||||
// nb. adding more props to token object will make V8 sad!
|
||||
|
||||
var size = text.length
|
||||
this.index += size
|
||||
this.line += lineBreaks
|
||||
if (lineBreaks !== 0) {
|
||||
this.col = size - nl + 1
|
||||
} else {
|
||||
this.col += size
|
||||
}
|
||||
|
||||
// throw, if no rule with {error: true}
|
||||
if (group.shouldThrow) {
|
||||
var err = new Error(this.formatError(token, "invalid syntax"))
|
||||
throw err;
|
||||
}
|
||||
|
||||
if (group.pop) this.popState()
|
||||
else if (group.push) this.pushState(group.push)
|
||||
else if (group.next) this.setState(group.next)
|
||||
|
||||
return token
|
||||
}
|
||||
|
||||
if (typeof Symbol !== 'undefined' && Symbol.iterator) {
|
||||
var LexerIterator = function(lexer) {
|
||||
this.lexer = lexer
|
||||
}
|
||||
|
||||
LexerIterator.prototype.next = function() {
|
||||
var token = this.lexer.next()
|
||||
return {value: token, done: !token}
|
||||
}
|
||||
|
||||
LexerIterator.prototype[Symbol.iterator] = function() {
|
||||
return this
|
||||
}
|
||||
|
||||
Lexer.prototype[Symbol.iterator] = function() {
|
||||
return new LexerIterator(this)
|
||||
}
|
||||
}
|
||||
|
||||
Lexer.prototype.formatError = function(token, message) {
|
||||
if (token == null) {
|
||||
// An undefined token indicates EOF
|
||||
var text = this.buffer.slice(this.index)
|
||||
var token = {
|
||||
text: text,
|
||||
offset: this.index,
|
||||
lineBreaks: text.indexOf('\n') === -1 ? 0 : 1,
|
||||
line: this.line,
|
||||
col: this.col,
|
||||
}
|
||||
}
|
||||
|
||||
var numLinesAround = 2
|
||||
var firstDisplayedLine = Math.max(token.line - numLinesAround, 1)
|
||||
var lastDisplayedLine = token.line + numLinesAround
|
||||
var lastLineDigits = String(lastDisplayedLine).length
|
||||
var displayedLines = lastNLines(
|
||||
this.buffer,
|
||||
(this.line - token.line) + numLinesAround + 1
|
||||
)
|
||||
.slice(0, 5)
|
||||
var errorLines = []
|
||||
errorLines.push(message + " at line " + token.line + " col " + token.col + ":")
|
||||
errorLines.push("")
|
||||
for (var i = 0; i < displayedLines.length; i++) {
|
||||
var line = displayedLines[i]
|
||||
var lineNo = firstDisplayedLine + i
|
||||
errorLines.push(pad(String(lineNo), lastLineDigits) + " " + line);
|
||||
if (lineNo === token.line) {
|
||||
errorLines.push(pad("", lastLineDigits + token.col + 1) + "^")
|
||||
}
|
||||
}
|
||||
return errorLines.join("\n")
|
||||
}
|
||||
|
||||
Lexer.prototype.clone = function() {
|
||||
return new Lexer(this.states, this.state)
|
||||
}
|
||||
|
||||
Lexer.prototype.has = function(tokenType) {
|
||||
return true
|
||||
}
|
||||
|
||||
|
||||
return {
|
||||
compile: compile,
|
||||
states: compileStates,
|
||||
error: Object.freeze({error: true}),
|
||||
fallback: Object.freeze({fallback: true}),
|
||||
keywords: keywordTransform,
|
||||
}
|
||||
|
||||
}));
|
29
node_modules/moo/package.json
generated
vendored
Normal file
29
node_modules/moo/package.json
generated
vendored
Normal file
@@ -0,0 +1,29 @@
|
||||
{
|
||||
"name": "moo",
|
||||
"version": "0.5.2",
|
||||
"description": "Optimised tokenizer/lexer generator! 🐄 Much performance. Moo!",
|
||||
"main": "moo.js",
|
||||
"files": [
|
||||
"moo.js"
|
||||
],
|
||||
"repository": "https://github.com/tjvr/moo.git",
|
||||
"author": "Tim Radvan <tim@tjvr.org>",
|
||||
"contributors": [
|
||||
"Nathan"
|
||||
],
|
||||
"license": "BSD-3-Clause",
|
||||
"scripts": {
|
||||
"test": "jest .",
|
||||
"benchmark": "benchr test/benchmark.js",
|
||||
"moo": "echo 'Mooooo!'"
|
||||
},
|
||||
"devDependencies": {
|
||||
"benchr": "^3.2.0",
|
||||
"chevrotain": "4.2.0",
|
||||
"jest": "24.7.1",
|
||||
"lex": "^1.7.9",
|
||||
"lexing": "^0.8.0",
|
||||
"remix": "^0.1.4",
|
||||
"tokenizer2": "^2.0.0"
|
||||
}
|
||||
}
|
Reference in New Issue
Block a user