Add some parsing stuff to finished/ for Tag

This commit is contained in:
Richard Feldman
2018-05-04 19:30:58 -04:00
parent 82e848ba97
commit 2bd0c78583
24 changed files with 2565 additions and 48 deletions

View File

@@ -0,0 +1 @@
elm-stuff

View File

@@ -0,0 +1,27 @@
Copyright (c) 2017-present, Evan Czaplicki
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice, this
list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright notice,
this list of conditions and the following disclaimer in the documentation
and/or other materials provided with the distribution.
* Neither the name of the {organization} nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

View File

@@ -0,0 +1,7 @@
# Parser Primitives
**In 99.9999% of cases, you do not want this.**
When creating a parser combinator library like [`elm-tools/parser`](https://github.com/elm-tools/parser), you want lower-level access to strings to get better performance.
This package exposes these low-level functions so that `elm-tools/parser` does not have an unfair performance advantage.

View File

@@ -0,0 +1,17 @@
{
"version": "1.0.0",
"summary": "Fast (but safe) primitives for creating parsing packages",
"repository": "https://github.com/elm-tools/parser-primitives.git",
"license": "BSD-3-Clause",
"source-directories": [
"src"
],
"exposed-modules": [
"ParserPrimitives"
],
"dependencies": {
"elm-lang/core": "5.0.0 <= v < 6.0.0"
},
"native-modules": true,
"elm-version": "0.18.0 <= v < 0.19.0"
}

View File

@@ -0,0 +1,130 @@
var _elm_tools$parser_primitives$Native_ParserPrimitives = function() {
// STRINGS
function isSubString(smallString, offset, row, col, bigString)
{
var smallLength = smallString.length;
var bigLength = bigString.length - offset;
if (bigLength < smallLength)
{
return tuple3(-1, row, col);
}
for (var i = 0; i < smallLength; i++)
{
var char = smallString[i];
if (char !== bigString[offset + i])
{
return tuple3(-1, row, col);
}
// if it is a two word character
if ((bigString.charCodeAt(offset) & 0xF800) === 0xD800)
{
i++
if (smallString[i] !== bigString[offset + i])
{
return tuple3(-1, row, col);
}
col++;
continue;
}
// if it is a newline
if (char === '\n')
{
row++;
col = 1;
continue;
}
// if it is a one word character
col++
}
return tuple3(offset + smallLength, row, col);
}
function tuple3(a, b, c)
{
return { ctor: '_Tuple3', _0: a, _1: b, _2: c };
}
// CHARS
var mkChar = _elm_lang$core$Native_Utils.chr;
function isSubChar(predicate, offset, string)
{
if (offset >= string.length)
{
return -1;
}
if ((string.charCodeAt(offset) & 0xF800) === 0xD800)
{
return predicate(mkChar(string.substr(offset, 2)))
? offset + 2
: -1;
}
var char = string[offset];
return predicate(mkChar(char))
? ((char === '\n') ? -2 : (offset + 1))
: -1;
}
// FIND STRING
function findSubString(before, smallString, offset, row, col, bigString)
{
var newOffset = bigString.indexOf(smallString, offset);
if (newOffset === -1)
{
return tuple3(-1, row, col);
}
var scanTarget = before ? newOffset : newOffset + smallString.length;
while (offset < scanTarget)
{
var char = bigString[offset];
if (char === '\n')
{
offset++;
row++;
col = 1;
continue;
}
if ((bigString.charCodeAt(offset) & 0xF800) === 0xD800)
{
offset += 2;
col++;
continue;
}
offset++;
col++;
}
return tuple3(offset, row, col);
}
return {
isSubString: F5(isSubString),
isSubChar: F3(isSubChar),
findSubString: F6(findSubString)
};
}();

View File

@@ -0,0 +1,109 @@
module ParserPrimitives exposing
( isSubString
, isSubChar
, findSubString
)
{-| Low-level functions for creating parser combinator libraries.
@docs isSubString, isSubChar, findSubString
-}
import Native.ParserPrimitives
-- STRINGS
{-| When making a fast parser, you want to avoid allocation as much as
possible. That means you never want to mess with the source string, only
keep track of an offset into that string.
You use `isSubString` like this:
isSubString "let" offset row col "let x = 4 in x"
--==> ( newOffset, newRow, newCol )
You are looking for `"let"` at a given `offset`. On failure, the
`newOffset` is `-1`. On success, the `newOffset` is the new offset. With
our `"let"` example, it would be `offset + 3`.
You also provide the current `row` and `col` which do not align with
`offset` in a clean way. For example, when you see a `\n` you are at
`row = row + 1` and `col = 1`. Furthermore, some UTF16 characters are
two words wide, so even if there are no newlines, `offset` and `col`
may not be equal.
-}
isSubString : String -> Int -> Int -> Int -> String -> (Int, Int, Int)
isSubString =
Native.ParserPrimitives.isSubString
-- CHARACTERS
{-| Again, when parsing, you want to allocate as little as possible.
So this function lets you say:
isSubChar isSpace offset "this is the source string"
--==> newOffset
The `(Char -> Bool)` argument is called a predicate.
The `newOffset` value can be a few different things:
- `-1` means that the predicate failed
- `-2` means the predicate succeeded with a `\n`
- otherwise you will get `offset + 1` or `offset + 2`
depending on whether the UTF16 character is one or two
words wide.
It is better to use union types in general, but it is worth the
danger *within* parsing libraries to get the benefit *outside*.
So you can write a `chomp` function like this:
chomp : (Char -> Bool) -> Int -> Int -> Int -> String -> (Int, Int, Int)
chomp isGood offset row col source =
let
newOffset =
Prim.isSubChar isGood offset source
in
-- no match
if newOffset == -1 then
(offset, row, col)
-- newline match
else if newOffset == -2 then
chomp isGood (offset + 1) (row + 1) 1 source
-- normal match
else
chomp isGood newOffset row (col + 1) source
Notice that `chomp` can be tail-call optimized, so this turns into a
`while` loop under the hood.
-}
isSubChar : (Char -> Bool) -> Int -> String -> Int
isSubChar =
Native.ParserPrimitives.isSubChar
-- INDEX
{-| Find a substring after a given offset.
findSubString before "42" offset row col "Is 42 the answer?"
--==> (newOffset, newRow, newCol)
If `offset = 0` and `before = True` we would get `(3, 1, 4)`
If `offset = 0` and `before = False` we would get `(5, 1, 6)`
If `offset = 7` we would get `(-1, 1, 18)`
-}
findSubString : Bool -> String -> Int -> Int -> Int -> String -> (Int, Int, Int)
findSubString =
Native.ParserPrimitives.findSubString