Revert "Add some parsing stuff to finished/ for Tag"
This reverts commit 2bd0c78583.
This commit is contained in:
@@ -1,7 +1,6 @@
|
||||
{
|
||||
"rtfeldman/elm-validate": "3.0.0",
|
||||
"rtfeldman/selectlist": "1.0.0",
|
||||
"elm-tools/parser-primitives": "1.0.0",
|
||||
"elm-lang/navigation": "2.1.0",
|
||||
"elm-lang/virtual-dom": "2.0.4",
|
||||
"evancz/url-parser": "2.0.1",
|
||||
@@ -9,7 +8,6 @@
|
||||
"evancz/elm-markdown": "3.0.2",
|
||||
"elm-lang/dom": "1.1.1",
|
||||
"elm-lang/html": "2.0.0",
|
||||
"elm-tools/parser": "2.0.1",
|
||||
"elm-community/json-extra": "2.7.0",
|
||||
"elm-lang/http": "1.0.0",
|
||||
"lukewestby/elm-http-builder": "5.2.0",
|
||||
|
||||
@@ -1 +0,0 @@
|
||||
elm-stuff
|
||||
@@ -1,27 +0,0 @@
|
||||
Copyright (c) 2017-present, Evan Czaplicki
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice, this
|
||||
list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright notice,
|
||||
this list of conditions and the following disclaimer in the documentation
|
||||
and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of the {organization} nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
||||
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
@@ -1,7 +0,0 @@
|
||||
# Parser Primitives
|
||||
|
||||
**In 99.9999% of cases, you do not want this.**
|
||||
|
||||
When creating a parser combinator library like [`elm-tools/parser`](https://github.com/elm-tools/parser), you want lower-level access to strings to get better performance.
|
||||
|
||||
This package exposes these low-level functions so that `elm-tools/parser` does not have an unfair performance advantage.
|
||||
@@ -1,17 +0,0 @@
|
||||
{
|
||||
"version": "1.0.0",
|
||||
"summary": "Fast (but safe) primitives for creating parsing packages",
|
||||
"repository": "https://github.com/elm-tools/parser-primitives.git",
|
||||
"license": "BSD-3-Clause",
|
||||
"source-directories": [
|
||||
"src"
|
||||
],
|
||||
"exposed-modules": [
|
||||
"ParserPrimitives"
|
||||
],
|
||||
"dependencies": {
|
||||
"elm-lang/core": "5.0.0 <= v < 6.0.0"
|
||||
},
|
||||
"native-modules": true,
|
||||
"elm-version": "0.18.0 <= v < 0.19.0"
|
||||
}
|
||||
@@ -1,130 +0,0 @@
|
||||
var _elm_tools$parser_primitives$Native_ParserPrimitives = function() {
|
||||
|
||||
|
||||
// STRINGS
|
||||
|
||||
function isSubString(smallString, offset, row, col, bigString)
|
||||
{
|
||||
var smallLength = smallString.length;
|
||||
var bigLength = bigString.length - offset;
|
||||
|
||||
if (bigLength < smallLength)
|
||||
{
|
||||
return tuple3(-1, row, col);
|
||||
}
|
||||
|
||||
for (var i = 0; i < smallLength; i++)
|
||||
{
|
||||
var char = smallString[i];
|
||||
|
||||
if (char !== bigString[offset + i])
|
||||
{
|
||||
return tuple3(-1, row, col);
|
||||
}
|
||||
|
||||
// if it is a two word character
|
||||
if ((bigString.charCodeAt(offset) & 0xF800) === 0xD800)
|
||||
{
|
||||
i++
|
||||
if (smallString[i] !== bigString[offset + i])
|
||||
{
|
||||
return tuple3(-1, row, col);
|
||||
}
|
||||
col++;
|
||||
continue;
|
||||
}
|
||||
|
||||
// if it is a newline
|
||||
if (char === '\n')
|
||||
{
|
||||
row++;
|
||||
col = 1;
|
||||
continue;
|
||||
}
|
||||
|
||||
// if it is a one word character
|
||||
col++
|
||||
}
|
||||
|
||||
return tuple3(offset + smallLength, row, col);
|
||||
}
|
||||
|
||||
function tuple3(a, b, c)
|
||||
{
|
||||
return { ctor: '_Tuple3', _0: a, _1: b, _2: c };
|
||||
}
|
||||
|
||||
|
||||
// CHARS
|
||||
|
||||
var mkChar = _elm_lang$core$Native_Utils.chr;
|
||||
|
||||
function isSubChar(predicate, offset, string)
|
||||
{
|
||||
if (offset >= string.length)
|
||||
{
|
||||
return -1;
|
||||
}
|
||||
|
||||
if ((string.charCodeAt(offset) & 0xF800) === 0xD800)
|
||||
{
|
||||
return predicate(mkChar(string.substr(offset, 2)))
|
||||
? offset + 2
|
||||
: -1;
|
||||
}
|
||||
|
||||
var char = string[offset];
|
||||
|
||||
return predicate(mkChar(char))
|
||||
? ((char === '\n') ? -2 : (offset + 1))
|
||||
: -1;
|
||||
}
|
||||
|
||||
|
||||
// FIND STRING
|
||||
|
||||
function findSubString(before, smallString, offset, row, col, bigString)
|
||||
{
|
||||
var newOffset = bigString.indexOf(smallString, offset);
|
||||
|
||||
if (newOffset === -1)
|
||||
{
|
||||
return tuple3(-1, row, col);
|
||||
}
|
||||
|
||||
var scanTarget = before ? newOffset : newOffset + smallString.length;
|
||||
|
||||
while (offset < scanTarget)
|
||||
{
|
||||
var char = bigString[offset];
|
||||
|
||||
if (char === '\n')
|
||||
{
|
||||
offset++;
|
||||
row++;
|
||||
col = 1;
|
||||
continue;
|
||||
}
|
||||
|
||||
if ((bigString.charCodeAt(offset) & 0xF800) === 0xD800)
|
||||
{
|
||||
offset += 2;
|
||||
col++;
|
||||
continue;
|
||||
}
|
||||
|
||||
offset++;
|
||||
col++;
|
||||
}
|
||||
|
||||
return tuple3(offset, row, col);
|
||||
}
|
||||
|
||||
|
||||
return {
|
||||
isSubString: F5(isSubString),
|
||||
isSubChar: F3(isSubChar),
|
||||
findSubString: F6(findSubString)
|
||||
};
|
||||
|
||||
}();
|
||||
@@ -1,109 +0,0 @@
|
||||
module ParserPrimitives exposing
|
||||
( isSubString
|
||||
, isSubChar
|
||||
, findSubString
|
||||
)
|
||||
|
||||
{-| Low-level functions for creating parser combinator libraries.
|
||||
|
||||
@docs isSubString, isSubChar, findSubString
|
||||
-}
|
||||
|
||||
import Native.ParserPrimitives
|
||||
|
||||
|
||||
|
||||
-- STRINGS
|
||||
|
||||
|
||||
{-| When making a fast parser, you want to avoid allocation as much as
|
||||
possible. That means you never want to mess with the source string, only
|
||||
keep track of an offset into that string.
|
||||
|
||||
You use `isSubString` like this:
|
||||
|
||||
isSubString "let" offset row col "let x = 4 in x"
|
||||
--==> ( newOffset, newRow, newCol )
|
||||
|
||||
You are looking for `"let"` at a given `offset`. On failure, the
|
||||
`newOffset` is `-1`. On success, the `newOffset` is the new offset. With
|
||||
our `"let"` example, it would be `offset + 3`.
|
||||
|
||||
You also provide the current `row` and `col` which do not align with
|
||||
`offset` in a clean way. For example, when you see a `\n` you are at
|
||||
`row = row + 1` and `col = 1`. Furthermore, some UTF16 characters are
|
||||
two words wide, so even if there are no newlines, `offset` and `col`
|
||||
may not be equal.
|
||||
-}
|
||||
isSubString : String -> Int -> Int -> Int -> String -> (Int, Int, Int)
|
||||
isSubString =
|
||||
Native.ParserPrimitives.isSubString
|
||||
|
||||
|
||||
|
||||
-- CHARACTERS
|
||||
|
||||
|
||||
{-| Again, when parsing, you want to allocate as little as possible.
|
||||
So this function lets you say:
|
||||
|
||||
isSubChar isSpace offset "this is the source string"
|
||||
--==> newOffset
|
||||
|
||||
The `(Char -> Bool)` argument is called a predicate.
|
||||
The `newOffset` value can be a few different things:
|
||||
|
||||
- `-1` means that the predicate failed
|
||||
- `-2` means the predicate succeeded with a `\n`
|
||||
- otherwise you will get `offset + 1` or `offset + 2`
|
||||
depending on whether the UTF16 character is one or two
|
||||
words wide.
|
||||
|
||||
It is better to use union types in general, but it is worth the
|
||||
danger *within* parsing libraries to get the benefit *outside*.
|
||||
|
||||
So you can write a `chomp` function like this:
|
||||
|
||||
chomp : (Char -> Bool) -> Int -> Int -> Int -> String -> (Int, Int, Int)
|
||||
chomp isGood offset row col source =
|
||||
let
|
||||
newOffset =
|
||||
Prim.isSubChar isGood offset source
|
||||
in
|
||||
-- no match
|
||||
if newOffset == -1 then
|
||||
(offset, row, col)
|
||||
|
||||
-- newline match
|
||||
else if newOffset == -2 then
|
||||
chomp isGood (offset + 1) (row + 1) 1 source
|
||||
|
||||
-- normal match
|
||||
else
|
||||
chomp isGood newOffset row (col + 1) source
|
||||
|
||||
Notice that `chomp` can be tail-call optimized, so this turns into a
|
||||
`while` loop under the hood.
|
||||
-}
|
||||
isSubChar : (Char -> Bool) -> Int -> String -> Int
|
||||
isSubChar =
|
||||
Native.ParserPrimitives.isSubChar
|
||||
|
||||
|
||||
|
||||
-- INDEX
|
||||
|
||||
|
||||
{-| Find a substring after a given offset.
|
||||
|
||||
findSubString before "42" offset row col "Is 42 the answer?"
|
||||
--==> (newOffset, newRow, newCol)
|
||||
|
||||
If `offset = 0` and `before = True` we would get `(3, 1, 4)`
|
||||
If `offset = 0` and `before = False` we would get `(5, 1, 6)`
|
||||
|
||||
If `offset = 7` we would get `(-1, 1, 18)`
|
||||
-}
|
||||
findSubString : Bool -> String -> Int -> Int -> Int -> String -> (Int, Int, Int)
|
||||
findSubString =
|
||||
Native.ParserPrimitives.findSubString
|
||||
@@ -1 +0,0 @@
|
||||
elm-stuff
|
||||
@@ -1,27 +0,0 @@
|
||||
Copyright (c) 2017-present, Evan Czaplicki
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice, this
|
||||
list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright notice,
|
||||
this list of conditions and the following disclaimer in the documentation
|
||||
and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of the {organization} nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
||||
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
@@ -1,170 +0,0 @@
|
||||
# Parser + Nice Error Messages
|
||||
|
||||
Goals:
|
||||
|
||||
- Make writing parsers as simple and fun as possible.
|
||||
- Produce excellent error messages.
|
||||
- Go pretty fast.
|
||||
|
||||
This is achieved with a couple concepts that I have not seen in any other parser libraries: [parser pipelines](#parser-pipelines), [tracking context](#tracking-context), and [delayed commits](#delayed-commits).
|
||||
|
||||
|
||||
## Parser Pipelines
|
||||
|
||||
To parse a 2D point like `( 3, 4 )`, you might create a `point` parser like this:
|
||||
|
||||
```elm
|
||||
import Parser exposing (Parser, (|.), (|=), succeed, symbol, float, ignore, zeroOrMore)
|
||||
|
||||
|
||||
type alias Point =
|
||||
{ x : Float
|
||||
, y : Float
|
||||
}
|
||||
|
||||
|
||||
point : Parser Point
|
||||
point =
|
||||
succeed Point
|
||||
|. symbol "("
|
||||
|. spaces
|
||||
|= float
|
||||
|. spaces
|
||||
|. symbol ","
|
||||
|. spaces
|
||||
|= float
|
||||
|. spaces
|
||||
|. symbol ")"
|
||||
|
||||
|
||||
spaces : Parser ()
|
||||
spaces =
|
||||
ignore zeroOrMore (\c -> c == ' ')
|
||||
```
|
||||
|
||||
All the interesting stuff is happening in `point`. It uses two operators:
|
||||
|
||||
- [`(|.)`][ignore] means “parse this, but **ignore** the result”
|
||||
- [`(|=)`][keep] means “parse this, and **keep** the result”
|
||||
|
||||
So the `Point` function only gets the result of the two `float` parsers.
|
||||
|
||||
[ignore]: http://package.elm-lang.org/packages/elm-tools/parser/latest/Parser#|.
|
||||
[keep]: http://package.elm-lang.org/packages/elm-tools/parser/latest/Parser#|=
|
||||
|
||||
The theory is that `|=` introduces more “visual noise” than `|.`, making it pretty easy to pick out which lines in the pipeline are important.
|
||||
|
||||
I recommend having one line per operator in your parser pipeline. If you need multiple lines for some reason, use a `let` or make a helper function.
|
||||
|
||||
|
||||
## Tracking Context
|
||||
|
||||
Most parsers tell you the row and column of the problem:
|
||||
|
||||
Something went wrong at (4:17)
|
||||
|
||||
That may be true, but it is not how humans think. It is how text editors think! It would be better to say:
|
||||
|
||||
I found a problem with this list:
|
||||
|
||||
[ 1, 23zm5, 3 ]
|
||||
^
|
||||
I wanted an integer, like 6 or 90219.
|
||||
|
||||
Notice that the error messages says `this list`. That is context! That is the language my brain speaks, not rows and columns.
|
||||
|
||||
This parser package lets you annotate context with the [`inContext`][inContext] function. You can let the parser know “I am trying to parse a `"list"` right now” so if an error happens anywhere in that context, you get the hand annotation!
|
||||
|
||||
[inContext]: http://package.elm-lang.org/packages/elm-tools/parser/latest/Parser#inContext
|
||||
|
||||
> **Note:** This technique is used by the parser in the Elm compiler to give more helpful error messages.
|
||||
|
||||
|
||||
## Delayed Commits
|
||||
|
||||
To make fast parsers with precise error messages, this package lets you control when a parser **commits** to a certain path.
|
||||
|
||||
For example, you are trying to parse the following list:
|
||||
|
||||
```elm
|
||||
[ 1, 23zm5, 3 ]
|
||||
```
|
||||
|
||||
Ideally, you want the error at the `z`, but the libraries I have seen make this difficult to achieve efficiently. You often end up with an error at `[` because “something went wrong”.
|
||||
|
||||
**This package introduces [`delayedCommit`][delayedCommit] to resolve this.**
|
||||
|
||||
Say we want to create `intList`, a parser for comma separated lists of integers like `[1, 2, 3]`. We would say something like this:
|
||||
|
||||
[delayedCommit]: http://package.elm-lang.org/packages/elm-tools/parser/latest/Parser#delayedCommit
|
||||
|
||||
```elm
|
||||
import Parser exposing (..)
|
||||
|
||||
|
||||
{-| We start by ignoring the opening square brace and some spaces.
|
||||
We only really care about the numbers, so we parse an `int` and
|
||||
then use `intListHelp` to start chomping other list entries.
|
||||
-}
|
||||
intList : Parser (List Int)
|
||||
intList =
|
||||
succeed identity
|
||||
|. symbol "["
|
||||
|. spaces
|
||||
|= andThen (\n -> intListHelp [n]) int
|
||||
|. spaces
|
||||
|. symbol "]"
|
||||
|
||||
|
||||
{-| `intListHelp` checks if there is a `nextInt`. If so, it
|
||||
continues trying to find more list items. If not, it gives
|
||||
back the list of integers we have accumulated so far.
|
||||
-}
|
||||
intListHelp : List Int -> Parser (List Int)
|
||||
intListHelp revInts =
|
||||
oneOf
|
||||
[ nextInt
|
||||
|> andThen (\n -> intListHelp (n :: revInts))
|
||||
, succeed (List.reverse revInts)
|
||||
]
|
||||
```
|
||||
|
||||
Now we get to the tricky part! How do we define `nextInt`? Here are two approaches, but only the second one actually works!
|
||||
|
||||
|
||||
```elm
|
||||
-- BAD
|
||||
badNextInt : Parser Int
|
||||
badNextInt =
|
||||
succeed identity
|
||||
|. spaces
|
||||
|. symbol ","
|
||||
|. spaces
|
||||
|= int
|
||||
|
||||
-- GOOD
|
||||
nextInt : Parser Int
|
||||
nextInt =
|
||||
delayedCommit spaces <|
|
||||
succeed identity
|
||||
|. symbol ","
|
||||
|. spaces
|
||||
|= int
|
||||
```
|
||||
|
||||
The `badNextInt` looks pretty normal, but it will not work. It commits as soon as the first `spaces` parser succeeds. It fails in the following situation:
|
||||
|
||||
```elm
|
||||
[ 1, 2, 3 ]
|
||||
^
|
||||
```
|
||||
|
||||
When we get to the closing `]` we have already successfully parsed some spaces. That means we are commited to `badNextInt` and need a comma. That fails, so the whole parse fails!
|
||||
|
||||
With `nextInt`, the [`delayedCommit`][delayedCommit] function is saying to parse `spaces` but only commit if progress is made *after* that. So we are only commited to this parser if we see a comma.
|
||||
|
||||
<br>
|
||||
|
||||
<br>
|
||||
|
||||
## [Comparison with Prior Work](https://github.com/elm-tools/parser/blob/master/comparison.md)
|
||||
@@ -1,69 +0,0 @@
|
||||
## Comparison with Prior Work
|
||||
|
||||
I have not seen the [parser pipeline][1] or the [context stack][2] ideas in other libraries, but [delayed commits][3] relate to prior work.
|
||||
|
||||
[1]: README.md#parser-pipelines
|
||||
[2]: README.md#tracking-context
|
||||
[3]: README.md#delayed-commits
|
||||
|
||||
Most parser combinator libraries I have seen are based on Haskell’s Parsec library, which has primitives named `try` and `lookAhead`. I believe [`delayedCommitMap`][delayedCommitMap] is a better primitive for two reasons.
|
||||
|
||||
[delayedCommitMap]: http://package.elm-lang.org/packages/elm-tools/parser/latest/Parser#delayedCommitMap
|
||||
|
||||
|
||||
### Performance and Composition
|
||||
|
||||
Say we want to create a precise error message for `length [1,,3]`. The naive approach with Haskell’s Parsec library produces very bad error messages:
|
||||
|
||||
```haskell
|
||||
spaceThenArg :: Parser Expr
|
||||
spaceThenArg =
|
||||
try (spaces >> term)
|
||||
```
|
||||
|
||||
This means we get a precise error from `term`, but then throw it away and say something went wrong at the space before the `[`. Very confusing! To improve quality, we must write something like this:
|
||||
|
||||
```haskell
|
||||
spaceThenArg :: Parser Expr
|
||||
spaceThenArg =
|
||||
choice
|
||||
[ do lookAhead (spaces >> char '[')
|
||||
spaces
|
||||
term
|
||||
, try (spaces >> term)
|
||||
]
|
||||
```
|
||||
|
||||
Notice that we parse `spaces` twice no matter what.
|
||||
|
||||
Notice that we also had to hardcode `[` in the `lookAhead`. What if we update `term` to parse records that start with `{` as well? To get good commits on records, we must remember to update `lookAhead` to look for `oneOf "[{"`. Implementation details are leaking out of `term`!
|
||||
|
||||
With `delayedCommit` in this Elm library, you can just say:
|
||||
|
||||
```elm
|
||||
spaceThenArg : Parser Expr
|
||||
spaceThenArg =
|
||||
delayedCommit spaces term
|
||||
```
|
||||
|
||||
It does less work, and is more reliable as `term` evolves. I believe `delayedCommit` makes `lookAhead` pointless.
|
||||
|
||||
|
||||
### Expressiveness
|
||||
|
||||
You can define `try` in terms of [`delayedCommitMap`][delayedCommitMap] like this:
|
||||
|
||||
```elm
|
||||
try : Parser a -> Parser a
|
||||
try parser =
|
||||
delayedCommitMap always parser (succeed ())
|
||||
```
|
||||
|
||||
No expressiveness is lost!
|
||||
|
||||
While it is possible to define `try`, I left it out of this package. In practice, `try` often leads to “bad commits” where your parser fails in a very specific way, but you then backtrack to a less specific error message. I considered naming it `allOrNothing` to better explain how it changes commit behavior, but ultimately, I thought it was best to encourage users to express their parsers with `delayedCommit` directly.
|
||||
|
||||
|
||||
### Summary
|
||||
|
||||
Compared to previous work, `delayedCommit` lets you produce precise error messages **more efficiently**. By thinking about “commit behavior” directly, you also end up with **cleaner composition** of parsers. And these benefits come **without any loss of expressiveness**.
|
||||
@@ -1,19 +0,0 @@
|
||||
{
|
||||
"version": "2.0.1",
|
||||
"summary": "a parsing library, focused on simplicity and great error messages",
|
||||
"repository": "https://github.com/elm-tools/parser.git",
|
||||
"license": "BSD-3-Clause",
|
||||
"source-directories": [
|
||||
"src"
|
||||
],
|
||||
"exposed-modules": [
|
||||
"Parser",
|
||||
"Parser.LanguageKit",
|
||||
"Parser.LowLevel"
|
||||
],
|
||||
"dependencies": {
|
||||
"elm-lang/core": "5.1.0 <= v < 6.0.0",
|
||||
"elm-tools/parser-primitives": "1.0.0 <= v < 2.0.0"
|
||||
},
|
||||
"elm-version": "0.18.0 <= v < 0.19.0"
|
||||
}
|
||||
File diff suppressed because it is too large
Load Diff
@@ -1,149 +0,0 @@
|
||||
module Parser.Internal exposing
|
||||
( Parser(..)
|
||||
, Step(..)
|
||||
, State
|
||||
, chomp
|
||||
, chompDigits
|
||||
, chompDotAndExp
|
||||
, isBadIntEnd
|
||||
)
|
||||
|
||||
|
||||
import Char
|
||||
import ParserPrimitives as Prim
|
||||
|
||||
|
||||
|
||||
-- PARSERS
|
||||
|
||||
|
||||
type Parser ctx x a =
|
||||
Parser (State ctx -> Step ctx x a)
|
||||
|
||||
|
||||
type Step ctx x a
|
||||
= Good a (State ctx)
|
||||
| Bad x (State ctx)
|
||||
|
||||
|
||||
type alias State ctx =
|
||||
{ source : String
|
||||
, offset : Int
|
||||
, indent : Int
|
||||
, context : List ctx
|
||||
, row : Int
|
||||
, col : Int
|
||||
}
|
||||
|
||||
|
||||
|
||||
-- CHOMPERS
|
||||
|
||||
|
||||
chomp : (Char -> Bool) -> Int -> String -> Int
|
||||
chomp isGood offset source =
|
||||
let
|
||||
newOffset =
|
||||
Prim.isSubChar isGood offset source
|
||||
in
|
||||
if newOffset < 0 then
|
||||
offset
|
||||
|
||||
else
|
||||
chomp isGood newOffset source
|
||||
|
||||
|
||||
|
||||
-- CHOMP DIGITS
|
||||
|
||||
|
||||
chompDigits : (Char -> Bool) -> Int -> String -> Result Int Int
|
||||
chompDigits isValidDigit offset source =
|
||||
let
|
||||
newOffset =
|
||||
chomp isValidDigit offset source
|
||||
in
|
||||
-- no digits
|
||||
if newOffset == offset then
|
||||
Err newOffset
|
||||
|
||||
-- ends with non-digit characters
|
||||
else if Prim.isSubChar isBadIntEnd newOffset source /= -1 then
|
||||
Err newOffset
|
||||
|
||||
-- all valid digits!
|
||||
else
|
||||
Ok newOffset
|
||||
|
||||
|
||||
isBadIntEnd : Char -> Bool
|
||||
isBadIntEnd char =
|
||||
Char.isDigit char
|
||||
|| Char.isUpper char
|
||||
|| Char.isLower char
|
||||
|| char == '.'
|
||||
|
||||
|
||||
|
||||
-- CHOMP FLOAT STUFF
|
||||
|
||||
|
||||
chompDotAndExp : Int -> String -> Result Int Int
|
||||
chompDotAndExp offset source =
|
||||
let
|
||||
dotOffset =
|
||||
Prim.isSubChar isDot offset source
|
||||
in
|
||||
if dotOffset == -1 then
|
||||
chompExp offset source
|
||||
|
||||
else
|
||||
chompExp (chomp Char.isDigit dotOffset source) source
|
||||
|
||||
|
||||
isDot : Char -> Bool
|
||||
isDot char =
|
||||
char == '.'
|
||||
|
||||
|
||||
chompExp : Int -> String -> Result Int Int
|
||||
chompExp offset source =
|
||||
let
|
||||
eOffset =
|
||||
Prim.isSubChar isE offset source
|
||||
in
|
||||
if eOffset == -1 then
|
||||
Ok offset
|
||||
|
||||
else
|
||||
let
|
||||
opOffset =
|
||||
Prim.isSubChar isPlusOrMinus eOffset source
|
||||
|
||||
expOffset =
|
||||
if opOffset == -1 then eOffset else opOffset
|
||||
in
|
||||
if Prim.isSubChar isZero expOffset source /= -1 then
|
||||
Err expOffset
|
||||
|
||||
else if Prim.isSubChar Char.isDigit expOffset source == -1 then
|
||||
Err expOffset
|
||||
|
||||
else
|
||||
chompDigits Char.isDigit expOffset source
|
||||
|
||||
|
||||
isE : Char -> Bool
|
||||
isE char =
|
||||
char == 'e' || char == 'E'
|
||||
|
||||
|
||||
isZero : Char -> Bool
|
||||
isZero char =
|
||||
char == '0'
|
||||
|
||||
|
||||
isPlusOrMinus : Char -> Bool
|
||||
isPlusOrMinus char =
|
||||
char == '+' || char == '-'
|
||||
|
||||
@@ -1,509 +0,0 @@
|
||||
module Parser.LanguageKit exposing
|
||||
( variable
|
||||
, list, record, tuple, sequence, Trailing(..)
|
||||
, whitespace, LineComment(..), MultiComment(..)
|
||||
)
|
||||
|
||||
|
||||
{-|
|
||||
|
||||
# Variables
|
||||
@docs variable
|
||||
|
||||
# Lists, records, and that sort of thing
|
||||
@docs list, record, tuple, sequence, Trailing
|
||||
|
||||
# Whitespace
|
||||
@docs whitespace, LineComment, MultiComment
|
||||
|
||||
-}
|
||||
|
||||
|
||||
import Set exposing (Set)
|
||||
import Parser exposing (..)
|
||||
import Parser.Internal as I exposing (Step(..), State)
|
||||
import ParserPrimitives as Prim
|
||||
|
||||
|
||||
|
||||
-- VARIABLES
|
||||
|
||||
|
||||
{-| Create a parser for variables. It takes two `Char` checkers. The
|
||||
first one is for the first character. The second one is for all the
|
||||
other characters.
|
||||
|
||||
In Elm, we distinguish between upper and lower case variables, so we
|
||||
can do something like this:
|
||||
|
||||
import Char
|
||||
import Parser exposing (..)
|
||||
import Parser.LanguageKit exposing (variable)
|
||||
import Set
|
||||
|
||||
lowVar : Parser String
|
||||
lowVar =
|
||||
variable Char.isLower isVarChar keywords
|
||||
|
||||
capVar : Parser String
|
||||
capVar =
|
||||
variable Char.isUpper isVarChar keywords
|
||||
|
||||
isVarChar : Char -> Bool
|
||||
isVarChar char =
|
||||
Char.isLower char
|
||||
|| Char.isUpper char
|
||||
|| Char.isDigit char
|
||||
|| char == '_'
|
||||
|
||||
keywords : Set.Set String
|
||||
keywords =
|
||||
Set.fromList [ "let", "in", "case", "of" ]
|
||||
-}
|
||||
variable : (Char -> Bool) -> (Char -> Bool) -> Set String -> Parser String
|
||||
variable isFirst isOther keywords =
|
||||
I.Parser <| \({ source, offset, indent, context, row, col } as state1) ->
|
||||
let
|
||||
firstOffset =
|
||||
Prim.isSubChar isFirst offset source
|
||||
in
|
||||
if firstOffset == -1 then
|
||||
Bad ExpectingVariable state1
|
||||
|
||||
else
|
||||
let
|
||||
state2 =
|
||||
if firstOffset == -2 then
|
||||
varHelp isOther (offset + 1) (row + 1) 1 source indent context
|
||||
else
|
||||
varHelp isOther firstOffset row (col + 1) source indent context
|
||||
|
||||
name =
|
||||
String.slice offset state2.offset source
|
||||
in
|
||||
if Set.member name keywords then
|
||||
Bad ExpectingVariable state1
|
||||
|
||||
else
|
||||
Good name state2
|
||||
|
||||
|
||||
varHelp : (Char -> Bool) -> Int -> Int -> Int -> String -> Int -> List ctx -> State ctx
|
||||
varHelp isGood offset row col source indent context =
|
||||
let
|
||||
newOffset =
|
||||
Prim.isSubChar isGood offset source
|
||||
in
|
||||
if newOffset == -1 then
|
||||
{ source = source
|
||||
, offset = offset
|
||||
, indent = indent
|
||||
, context = context
|
||||
, row = row
|
||||
, col = col
|
||||
}
|
||||
|
||||
else if newOffset == -2 then
|
||||
varHelp isGood (offset + 1) (row + 1) 1 source indent context
|
||||
|
||||
else
|
||||
varHelp isGood newOffset row (col + 1) source indent context
|
||||
|
||||
|
||||
|
||||
-- SEQUENCES
|
||||
|
||||
|
||||
{-| Parse a comma-separated list like `[ 1, 2, 3 ]`. You provide
|
||||
a parser for the spaces and for the list items. So if you want
|
||||
to parse a list of integers, you would say:
|
||||
|
||||
import Parser exposing (Parser)
|
||||
import Parser.LanguageKit as Parser
|
||||
|
||||
intList : Parser (List Int)
|
||||
intList =
|
||||
Parser.list spaces Parser.int
|
||||
|
||||
spaces : Parser ()
|
||||
spaces =
|
||||
Parser.ignore zeroOrMore (\char -> char == ' ')
|
||||
|
||||
-- run intList "[]" == Ok []
|
||||
-- run intList "[ ]" == Ok []
|
||||
-- run intList "[1,2,3]" == Ok [1,2,3]
|
||||
-- run intList "[ 1, 2, 3 ]" == Ok [1,2,3]
|
||||
-- run intList "[ 1 , 2 , 3 ]" == Ok [1,2,3]
|
||||
-- run intList "[ 1, 2, 3, ]" == Err ...
|
||||
-- run intList "[, 1, 2, 3 ]" == Err ...
|
||||
|
||||
**Note:** If you want trailing commas, check out the
|
||||
[`sequence`](#sequence) function.
|
||||
-}
|
||||
list : Parser () -> Parser a -> Parser (List a)
|
||||
list spaces item =
|
||||
sequence
|
||||
{ start = "["
|
||||
, separator = ","
|
||||
, end = "]"
|
||||
, spaces = spaces
|
||||
, item = item
|
||||
, trailing = Forbidden
|
||||
}
|
||||
|
||||
|
||||
{-| Help parse records like `{ a = 2, b = 2 }`. You provide
|
||||
a parser for the spaces and for the list items, you might say:
|
||||
|
||||
import Parser exposing ( Parser, (|.), (|=), zeroOrMore )
|
||||
import Parser.LanguageKit as Parser
|
||||
|
||||
record : Parser (List (String, Int))
|
||||
record =
|
||||
Parser.record spaces field
|
||||
|
||||
field : Parser (String, Int)
|
||||
field =
|
||||
Parser.succeed (,)
|
||||
|= lowVar
|
||||
|. spaces
|
||||
|. Parser.symbol "="
|
||||
|. spaces
|
||||
|= int
|
||||
|
||||
spaces : Parser ()
|
||||
spaces =
|
||||
Parser.ignore zeroOrMore (\char -> char == ' ')
|
||||
|
||||
-- run record "{}" == Ok []
|
||||
-- run record "{ }" == Ok []
|
||||
-- run record "{ x = 3 }" == Ok [ ("x",3) ]
|
||||
-- run record "{ x = 3, }" == Err ...
|
||||
-- run record "{ x = 3, y = 4 }" == Ok [ ("x",3), ("y",4) ]
|
||||
-- run record "{ x = 3, y = }" == Err ...
|
||||
|
||||
**Note:** If you want trailing commas, check out the
|
||||
[`sequence`](#sequence) function.
|
||||
-}
|
||||
record : Parser () -> Parser a -> Parser (List a)
|
||||
record spaces item =
|
||||
sequence
|
||||
{ start = "{"
|
||||
, separator = ","
|
||||
, end = "}"
|
||||
, spaces = spaces
|
||||
, item = item
|
||||
, trailing = Forbidden
|
||||
}
|
||||
|
||||
|
||||
{-| Help parse tuples like `(3, 4)`. Works just like [`list`](#list)
|
||||
and [`record`](#record). And if you need something custom, check out
|
||||
the [`sequence`](#sequence) function.
|
||||
-}
|
||||
tuple : Parser () -> Parser a -> Parser (List a)
|
||||
tuple spaces item =
|
||||
sequence
|
||||
{ start = "("
|
||||
, separator = ","
|
||||
, end = ")"
|
||||
, spaces = spaces
|
||||
, item = item
|
||||
, trailing = Forbidden
|
||||
}
|
||||
|
||||
|
||||
{-| Handle things *like* lists and records, but you can customize the
|
||||
details however you need. Say you want to parse C-style code blocks:
|
||||
|
||||
import Parser exposing (Parser)
|
||||
import Parser.LanguageKit as Parser exposing (Trailing(..))
|
||||
|
||||
block : Parser (List Stmt)
|
||||
block =
|
||||
Parser.sequence
|
||||
{ start = "{"
|
||||
, separator = ";"
|
||||
, end = "}"
|
||||
, spaces = spaces
|
||||
, item = statement
|
||||
, trailing = Mandatory -- demand a trailing semi-colon
|
||||
}
|
||||
|
||||
-- spaces : Parser ()
|
||||
-- statement : Parser Stmt
|
||||
|
||||
**Note:** If you need something more custom, do not be afraid to check
|
||||
out the implementation and customize it for your case. It is better to
|
||||
get nice error messages with a lower-level implementation than to try
|
||||
to hack high-level parsers to do things they are not made for.
|
||||
-}
|
||||
sequence
|
||||
: { start : String
|
||||
, separator : String
|
||||
, end : String
|
||||
, spaces : Parser ()
|
||||
, item : Parser a
|
||||
, trailing : Trailing
|
||||
}
|
||||
-> Parser (List a)
|
||||
sequence { start, end, spaces, item, separator, trailing } =
|
||||
symbol start
|
||||
|- spaces
|
||||
|- sequenceEnd end spaces item separator trailing
|
||||
|
||||
|
||||
{-| What’s the deal with trailing commas? Are they `Forbidden`?
|
||||
Are they `Optional`? Are they `Mandatory`? Welcome to [shapes
|
||||
club](http://poorlydrawnlines.com/comic/shapes-club/)!
|
||||
-}
|
||||
type Trailing = Forbidden | Optional | Mandatory
|
||||
|
||||
|
||||
ignore : Parser ignore -> Parser keep -> Parser keep
|
||||
ignore ignoreParser keepParser =
|
||||
map2 revAlways ignoreParser keepParser
|
||||
|
||||
|
||||
(|-) : Parser ignore -> Parser keep -> Parser keep
|
||||
(|-) =
|
||||
ignore
|
||||
|
||||
|
||||
revAlways : ignore -> keep -> keep
|
||||
revAlways _ keep =
|
||||
keep
|
||||
|
||||
|
||||
sequenceEnd : String -> Parser () -> Parser a -> String -> Trailing -> Parser (List a)
|
||||
sequenceEnd end spaces parseItem sep trailing =
|
||||
let
|
||||
chompRest item =
|
||||
case trailing of
|
||||
Forbidden ->
|
||||
sequenceEndForbidden end spaces parseItem sep [item]
|
||||
|
||||
Optional ->
|
||||
sequenceEndOptional end spaces parseItem sep [item]
|
||||
|
||||
Mandatory ->
|
||||
spaces
|
||||
|- symbol sep
|
||||
|- spaces
|
||||
|- sequenceEndMandatory end spaces parseItem sep [item]
|
||||
in
|
||||
oneOf
|
||||
[ parseItem
|
||||
|> andThen chompRest
|
||||
, symbol end
|
||||
|- succeed []
|
||||
]
|
||||
|
||||
|
||||
sequenceEndForbidden : String -> Parser () -> Parser a -> String -> List a -> Parser (List a)
|
||||
sequenceEndForbidden end spaces parseItem sep revItems =
|
||||
let
|
||||
chompRest item =
|
||||
sequenceEndForbidden end spaces parseItem sep (item :: revItems)
|
||||
in
|
||||
ignore spaces <|
|
||||
oneOf
|
||||
[ symbol sep
|
||||
|- spaces
|
||||
|- andThen chompRest parseItem
|
||||
, symbol end
|
||||
|- succeed (List.reverse revItems)
|
||||
]
|
||||
|
||||
|
||||
sequenceEndOptional : String -> Parser () -> Parser a -> String -> List a -> Parser (List a)
|
||||
sequenceEndOptional end spaces parseItem sep revItems =
|
||||
let
|
||||
parseEnd =
|
||||
andThen (\_ -> succeed (List.reverse revItems)) (symbol end)
|
||||
|
||||
chompRest item =
|
||||
sequenceEndOptional end spaces parseItem sep (item :: revItems)
|
||||
in
|
||||
ignore spaces <|
|
||||
oneOf
|
||||
[ symbol sep
|
||||
|- spaces
|
||||
|- oneOf [ andThen chompRest parseItem, parseEnd ]
|
||||
, parseEnd
|
||||
]
|
||||
|
||||
|
||||
sequenceEndMandatory : String -> Parser () -> Parser a -> String -> List a -> Parser (List a)
|
||||
sequenceEndMandatory end spaces parseItem sep revItems =
|
||||
let
|
||||
chompRest item =
|
||||
sequenceEndMandatory end spaces parseItem sep (item :: revItems)
|
||||
in
|
||||
oneOf
|
||||
[ andThen chompRest <|
|
||||
parseItem
|
||||
|. spaces
|
||||
|. symbol sep
|
||||
|. spaces
|
||||
, symbol end
|
||||
|- succeed (List.reverse revItems)
|
||||
]
|
||||
|
||||
|
||||
|
||||
-- WHITESPACE
|
||||
|
||||
|
||||
{-| Create a custom whitespace parser. It will always chomp the
|
||||
`' '`, `'\r'`, and `'\n'` characters, but you can customize some
|
||||
other things. Here are some examples:
|
||||
|
||||
elm : Parser ()
|
||||
elm =
|
||||
whitespace
|
||||
{ allowTabs = False
|
||||
, lineComment = LineComment "--"
|
||||
, multiComment = NestableComment "{-" "-}"
|
||||
}
|
||||
|
||||
js : Parser ()
|
||||
js =
|
||||
whitespace
|
||||
{ allowTabs = True
|
||||
, lineComment = LineComment "//"
|
||||
, multiComment = UnnestableComment "/*" "*/"
|
||||
}
|
||||
|
||||
If you need further customization, please open an issue describing your
|
||||
scenario or check out the source code and write it yourself. This is all
|
||||
built using stuff from the root `Parser` module.
|
||||
-}
|
||||
whitespace
|
||||
: { allowTabs : Bool
|
||||
, lineComment : LineComment
|
||||
, multiComment : MultiComment
|
||||
}
|
||||
-> Parser ()
|
||||
whitespace { allowTabs, lineComment, multiComment } =
|
||||
let
|
||||
tabParser =
|
||||
if allowTabs then
|
||||
[ Parser.ignore zeroOrMore isTab ]
|
||||
else
|
||||
[]
|
||||
|
||||
lineParser =
|
||||
case lineComment of
|
||||
NoLineComment ->
|
||||
[]
|
||||
|
||||
LineComment start ->
|
||||
[ symbol start
|
||||
|. ignoreUntil "\n"
|
||||
]
|
||||
|
||||
multiParser =
|
||||
case multiComment of
|
||||
NoMultiComment ->
|
||||
[]
|
||||
|
||||
UnnestableComment start end ->
|
||||
[ symbol start
|
||||
|. ignoreUntil end
|
||||
]
|
||||
|
||||
NestableComment start end ->
|
||||
[ nestableComment start end
|
||||
]
|
||||
in
|
||||
whitespaceHelp <|
|
||||
oneOf (tabParser ++ lineParser ++ multiParser)
|
||||
|
||||
|
||||
chompSpaces : Parser ()
|
||||
chompSpaces =
|
||||
Parser.ignore zeroOrMore isSpace
|
||||
|
||||
|
||||
isSpace : Char -> Bool
|
||||
isSpace char =
|
||||
char == ' ' || char == '\n' || char == '\r'
|
||||
|
||||
|
||||
isTab : Char -> Bool
|
||||
isTab char =
|
||||
char == '\t'
|
||||
|
||||
|
||||
whitespaceHelp : Parser a -> Parser ()
|
||||
whitespaceHelp parser =
|
||||
ignore chompSpaces <|
|
||||
oneOf [ andThen (\_ -> whitespaceHelp parser) parser, succeed () ]
|
||||
|
||||
|
||||
{-| Are line comments allowed? If so, what symbol do they start with?
|
||||
|
||||
LineComment "--" -- Elm
|
||||
LineComment "//" -- JS
|
||||
LineComment "#" -- Python
|
||||
NoLineComment -- OCaml
|
||||
-}
|
||||
type LineComment = NoLineComment | LineComment String
|
||||
|
||||
|
||||
{-| Are multi-line comments allowed? If so, what symbols do they start
|
||||
and end with?
|
||||
|
||||
NestableComment "{-" "-}" -- Elm
|
||||
UnnestableComment "/*" "*/" -- JS
|
||||
NoMultiComment -- Python
|
||||
|
||||
In Elm, you can nest multi-line comments. In C-like languages, like JS,
|
||||
this is not allowed. As soon as you see a `*/` the comment is over no
|
||||
matter what.
|
||||
-}
|
||||
type MultiComment
|
||||
= NoMultiComment
|
||||
| NestableComment String String
|
||||
| UnnestableComment String String
|
||||
|
||||
|
||||
nestableComment : String -> String -> Parser ()
|
||||
nestableComment start end =
|
||||
case (String.uncons start, String.uncons end) of
|
||||
(Nothing, _) ->
|
||||
fail "Trying to parse a multi-line comment, but the start token cannot be the empty string!"
|
||||
|
||||
(_, Nothing) ->
|
||||
fail "Trying to parse a multi-line comment, but the end token cannot be the empty string!"
|
||||
|
||||
( Just (startChar, _), Just (endChar, _) ) ->
|
||||
let
|
||||
isNotRelevant char =
|
||||
char /= startChar && char /= endChar
|
||||
in
|
||||
symbol start
|
||||
|. nestableCommentHelp isNotRelevant start end 1
|
||||
|
||||
|
||||
nestableCommentHelp : (Char -> Bool) -> String -> String -> Int -> Parser ()
|
||||
nestableCommentHelp isNotRelevant start end nestLevel =
|
||||
lazy <| \_ ->
|
||||
ignore (Parser.ignore zeroOrMore isNotRelevant) <|
|
||||
oneOf
|
||||
[ ignore (symbol end) <|
|
||||
if nestLevel == 1 then
|
||||
succeed ()
|
||||
else
|
||||
nestableCommentHelp isNotRelevant start end (nestLevel - 1)
|
||||
, ignore (symbol start) <|
|
||||
nestableCommentHelp isNotRelevant start end (nestLevel + 1)
|
||||
, ignore (Parser.ignore (Exactly 1) isChar) <|
|
||||
nestableCommentHelp isNotRelevant start end nestLevel
|
||||
]
|
||||
|
||||
|
||||
isChar : Char -> Bool
|
||||
isChar char =
|
||||
True
|
||||
@@ -1,118 +0,0 @@
|
||||
module Parser.LowLevel exposing
|
||||
( getIndentLevel
|
||||
, withIndentLevel
|
||||
|
||||
, getPosition
|
||||
, getRow
|
||||
, getCol
|
||||
|
||||
, getOffset
|
||||
, getSource
|
||||
)
|
||||
|
||||
{-| You are unlikely to need any of this under normal circumstances.
|
||||
|
||||
# Indentation
|
||||
@docs getIndentLevel, withIndentLevel
|
||||
|
||||
# Row, Column, Offset, and Source
|
||||
@docs getPosition, getRow, getCol, getOffset, getSource
|
||||
|
||||
-}
|
||||
|
||||
import Parser exposing (Parser)
|
||||
import Parser.Internal as I exposing (State)
|
||||
|
||||
|
||||
|
||||
-- INDENTATION
|
||||
|
||||
|
||||
{-| This parser tracks “indentation level” so you can parse indentation
|
||||
sensitive languages. Indentation levels correspond to column numbers, so
|
||||
it starts at 1.
|
||||
-}
|
||||
getIndentLevel : Parser Int
|
||||
getIndentLevel =
|
||||
I.Parser <| \state -> I.Good state.indent state
|
||||
|
||||
|
||||
{-| Run a parser with a given indentation level. So you will likely
|
||||
use `getCol` to get the current column, `andThen` give that to
|
||||
`withIndentLevel`.
|
||||
-}
|
||||
withIndentLevel : Int -> Parser a -> Parser a
|
||||
withIndentLevel newIndent (I.Parser parse) =
|
||||
I.Parser <| \state1 ->
|
||||
case parse (changeIndent newIndent state1) of
|
||||
I.Good a state2 ->
|
||||
I.Good a (changeIndent state1.indent state2)
|
||||
|
||||
I.Bad x state2 ->
|
||||
I.Bad x (changeIndent state1.indent state2)
|
||||
|
||||
|
||||
changeIndent : Int -> State ctx -> State ctx
|
||||
changeIndent newIndent { source, offset, context, row, col } =
|
||||
{ source = source
|
||||
, offset = offset
|
||||
, indent = newIndent
|
||||
, context = context
|
||||
, row = row
|
||||
, col = col
|
||||
}
|
||||
|
||||
|
||||
|
||||
-- POSITION
|
||||
|
||||
|
||||
{-| Code editors treat code like a grid. There are rows and columns.
|
||||
In most editors, rows and colums are 1-indexed. You move to a new row
|
||||
whenever you see a `\n` character.
|
||||
|
||||
The `getPosition` parser succeeds with your current row and column
|
||||
within the string you are parsing.
|
||||
-}
|
||||
getPosition : Parser (Int, Int)
|
||||
getPosition =
|
||||
I.Parser <| \state -> I.Good (state.row, state.col) state
|
||||
|
||||
|
||||
{-| The `getRow` parser succeeds with your current row within
|
||||
the string you are parsing.
|
||||
-}
|
||||
getRow : Parser Int
|
||||
getRow =
|
||||
I.Parser <| \state -> I.Good state.row state
|
||||
|
||||
|
||||
{-| The `getCol` parser succeeds with your current column within
|
||||
the string you are parsing.
|
||||
-}
|
||||
getCol : Parser Int
|
||||
getCol =
|
||||
I.Parser <| \state -> I.Good state.col state
|
||||
|
||||
|
||||
{-| Editors think of code as a grid, but behind the scenes it is just
|
||||
a flat array of UTF16 characters. `getOffset` tells you your index in
|
||||
that flat array. So if you have read `"\n\n\n\n"` you are on row 5,
|
||||
column 1, and offset 4.
|
||||
|
||||
**Note:** browsers use UTF16 strings, so characters may be one or two 16-bit
|
||||
words. This means you can read 4 characters, but your offset will move by 8.
|
||||
-}
|
||||
getOffset : Parser Int
|
||||
getOffset =
|
||||
I.Parser <| \state -> I.Good state.offset state
|
||||
|
||||
|
||||
{-| Get the entire string you are parsing right now. Paired with
|
||||
`getOffset` this can let you use `String.slice` to grab substrings
|
||||
with very little intermediate allocation.
|
||||
-}
|
||||
getSource : Parser String
|
||||
getSource =
|
||||
I.Parser <| \state -> I.Good state.source state
|
||||
|
||||
Reference in New Issue
Block a user