Add some parsing stuff to finished/ for Tag

This commit is contained in:
Richard Feldman
2018-05-04 19:30:58 -04:00
parent 82e848ba97
commit 2bd0c78583
24 changed files with 2565 additions and 48 deletions

View File

@@ -15,6 +15,7 @@
"elm-lang/html": "2.0.0 <= v < 3.0.0",
"elm-lang/http": "1.0.0 <= v < 2.0.0",
"elm-lang/navigation": "2.1.0 <= v < 3.0.0",
"elm-tools/parser": "2.0.1 <= v < 3.0.0",
"evancz/elm-markdown": "3.0.2 <= v < 4.0.0",
"evancz/url-parser": "2.0.1 <= v < 3.0.0",
"lukewestby/elm-http-builder": "5.1.0 <= v < 6.0.0",

View File

@@ -1,6 +1,7 @@
{
"rtfeldman/elm-validate": "3.0.0",
"rtfeldman/selectlist": "1.0.0",
"elm-tools/parser-primitives": "1.0.0",
"elm-lang/navigation": "2.1.0",
"elm-lang/virtual-dom": "2.0.4",
"evancz/url-parser": "2.0.1",
@@ -8,6 +9,7 @@
"evancz/elm-markdown": "3.0.2",
"elm-lang/dom": "1.1.1",
"elm-lang/html": "2.0.0",
"elm-tools/parser": "2.0.1",
"elm-community/json-extra": "2.7.0",
"elm-lang/http": "1.0.0",
"lukewestby/elm-http-builder": "5.2.0",

View File

@@ -0,0 +1 @@
elm-stuff

View File

@@ -0,0 +1,27 @@
Copyright (c) 2017-present, Evan Czaplicki
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice, this
list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright notice,
this list of conditions and the following disclaimer in the documentation
and/or other materials provided with the distribution.
* Neither the name of the {organization} nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

View File

@@ -0,0 +1,7 @@
# Parser Primitives
**In 99.9999% of cases, you do not want this.**
When creating a parser combinator library like [`elm-tools/parser`](https://github.com/elm-tools/parser), you want lower-level access to strings to get better performance.
This package exposes these low-level functions so that `elm-tools/parser` does not have an unfair performance advantage.

View File

@@ -0,0 +1,17 @@
{
"version": "1.0.0",
"summary": "Fast (but safe) primitives for creating parsing packages",
"repository": "https://github.com/elm-tools/parser-primitives.git",
"license": "BSD-3-Clause",
"source-directories": [
"src"
],
"exposed-modules": [
"ParserPrimitives"
],
"dependencies": {
"elm-lang/core": "5.0.0 <= v < 6.0.0"
},
"native-modules": true,
"elm-version": "0.18.0 <= v < 0.19.0"
}

View File

@@ -0,0 +1,130 @@
var _elm_tools$parser_primitives$Native_ParserPrimitives = function() {
// STRINGS
function isSubString(smallString, offset, row, col, bigString)
{
var smallLength = smallString.length;
var bigLength = bigString.length - offset;
if (bigLength < smallLength)
{
return tuple3(-1, row, col);
}
for (var i = 0; i < smallLength; i++)
{
var char = smallString[i];
if (char !== bigString[offset + i])
{
return tuple3(-1, row, col);
}
// if it is a two word character
if ((bigString.charCodeAt(offset) & 0xF800) === 0xD800)
{
i++
if (smallString[i] !== bigString[offset + i])
{
return tuple3(-1, row, col);
}
col++;
continue;
}
// if it is a newline
if (char === '\n')
{
row++;
col = 1;
continue;
}
// if it is a one word character
col++
}
return tuple3(offset + smallLength, row, col);
}
function tuple3(a, b, c)
{
return { ctor: '_Tuple3', _0: a, _1: b, _2: c };
}
// CHARS
var mkChar = _elm_lang$core$Native_Utils.chr;
function isSubChar(predicate, offset, string)
{
if (offset >= string.length)
{
return -1;
}
if ((string.charCodeAt(offset) & 0xF800) === 0xD800)
{
return predicate(mkChar(string.substr(offset, 2)))
? offset + 2
: -1;
}
var char = string[offset];
return predicate(mkChar(char))
? ((char === '\n') ? -2 : (offset + 1))
: -1;
}
// FIND STRING
function findSubString(before, smallString, offset, row, col, bigString)
{
var newOffset = bigString.indexOf(smallString, offset);
if (newOffset === -1)
{
return tuple3(-1, row, col);
}
var scanTarget = before ? newOffset : newOffset + smallString.length;
while (offset < scanTarget)
{
var char = bigString[offset];
if (char === '\n')
{
offset++;
row++;
col = 1;
continue;
}
if ((bigString.charCodeAt(offset) & 0xF800) === 0xD800)
{
offset += 2;
col++;
continue;
}
offset++;
col++;
}
return tuple3(offset, row, col);
}
return {
isSubString: F5(isSubString),
isSubChar: F3(isSubChar),
findSubString: F6(findSubString)
};
}();

View File

@@ -0,0 +1,109 @@
module ParserPrimitives exposing
( isSubString
, isSubChar
, findSubString
)
{-| Low-level functions for creating parser combinator libraries.
@docs isSubString, isSubChar, findSubString
-}
import Native.ParserPrimitives
-- STRINGS
{-| When making a fast parser, you want to avoid allocation as much as
possible. That means you never want to mess with the source string, only
keep track of an offset into that string.
You use `isSubString` like this:
isSubString "let" offset row col "let x = 4 in x"
--==> ( newOffset, newRow, newCol )
You are looking for `"let"` at a given `offset`. On failure, the
`newOffset` is `-1`. On success, the `newOffset` is the new offset. With
our `"let"` example, it would be `offset + 3`.
You also provide the current `row` and `col` which do not align with
`offset` in a clean way. For example, when you see a `\n` you are at
`row = row + 1` and `col = 1`. Furthermore, some UTF16 characters are
two words wide, so even if there are no newlines, `offset` and `col`
may not be equal.
-}
isSubString : String -> Int -> Int -> Int -> String -> (Int, Int, Int)
isSubString =
Native.ParserPrimitives.isSubString
-- CHARACTERS
{-| Again, when parsing, you want to allocate as little as possible.
So this function lets you say:
isSubChar isSpace offset "this is the source string"
--==> newOffset
The `(Char -> Bool)` argument is called a predicate.
The `newOffset` value can be a few different things:
- `-1` means that the predicate failed
- `-2` means the predicate succeeded with a `\n`
- otherwise you will get `offset + 1` or `offset + 2`
depending on whether the UTF16 character is one or two
words wide.
It is better to use union types in general, but it is worth the
danger *within* parsing libraries to get the benefit *outside*.
So you can write a `chomp` function like this:
chomp : (Char -> Bool) -> Int -> Int -> Int -> String -> (Int, Int, Int)
chomp isGood offset row col source =
let
newOffset =
Prim.isSubChar isGood offset source
in
-- no match
if newOffset == -1 then
(offset, row, col)
-- newline match
else if newOffset == -2 then
chomp isGood (offset + 1) (row + 1) 1 source
-- normal match
else
chomp isGood newOffset row (col + 1) source
Notice that `chomp` can be tail-call optimized, so this turns into a
`while` loop under the hood.
-}
isSubChar : (Char -> Bool) -> Int -> String -> Int
isSubChar =
Native.ParserPrimitives.isSubChar
-- INDEX
{-| Find a substring after a given offset.
findSubString before "42" offset row col "Is 42 the answer?"
--==> (newOffset, newRow, newCol)
If `offset = 0` and `before = True` we would get `(3, 1, 4)`
If `offset = 0` and `before = False` we would get `(5, 1, 6)`
If `offset = 7` we would get `(-1, 1, 18)`
-}
findSubString : Bool -> String -> Int -> Int -> Int -> String -> (Int, Int, Int)
findSubString =
Native.ParserPrimitives.findSubString

View File

@@ -0,0 +1 @@
elm-stuff

View File

@@ -0,0 +1,27 @@
Copyright (c) 2017-present, Evan Czaplicki
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice, this
list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright notice,
this list of conditions and the following disclaimer in the documentation
and/or other materials provided with the distribution.
* Neither the name of the {organization} nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

View File

@@ -0,0 +1,170 @@
# Parser + Nice Error Messages
Goals:
- Make writing parsers as simple and fun as possible.
- Produce excellent error messages.
- Go pretty fast.
This is achieved with a couple concepts that I have not seen in any other parser libraries: [parser pipelines](#parser-pipelines), [tracking context](#tracking-context), and [delayed commits](#delayed-commits).
## Parser Pipelines
To parse a 2D point like `( 3, 4 )`, you might create a `point` parser like this:
```elm
import Parser exposing (Parser, (|.), (|=), succeed, symbol, float, ignore, zeroOrMore)
type alias Point =
{ x : Float
, y : Float
}
point : Parser Point
point =
succeed Point
|. symbol "("
|. spaces
|= float
|. spaces
|. symbol ","
|. spaces
|= float
|. spaces
|. symbol ")"
spaces : Parser ()
spaces =
ignore zeroOrMore (\c -> c == ' ')
```
All the interesting stuff is happening in `point`. It uses two operators:
- [`(|.)`][ignore] means “parse this, but **ignore** the result”
- [`(|=)`][keep] means “parse this, and **keep** the result”
So the `Point` function only gets the result of the two `float` parsers.
[ignore]: http://package.elm-lang.org/packages/elm-tools/parser/latest/Parser#|.
[keep]: http://package.elm-lang.org/packages/elm-tools/parser/latest/Parser#|=
The theory is that `|=` introduces more “visual noise” than `|.`, making it pretty easy to pick out which lines in the pipeline are important.
I recommend having one line per operator in your parser pipeline. If you need multiple lines for some reason, use a `let` or make a helper function.
## Tracking Context
Most parsers tell you the row and column of the problem:
Something went wrong at (4:17)
That may be true, but it is not how humans think. It is how text editors think! It would be better to say:
I found a problem with this list:
[ 1, 23zm5, 3 ]
^
I wanted an integer, like 6 or 90219.
Notice that the error messages says `this list`. That is context! That is the language my brain speaks, not rows and columns.
This parser package lets you annotate context with the [`inContext`][inContext] function. You can let the parser know “I am trying to parse a `"list"` right now” so if an error happens anywhere in that context, you get the hand annotation!
[inContext]: http://package.elm-lang.org/packages/elm-tools/parser/latest/Parser#inContext
> **Note:** This technique is used by the parser in the Elm compiler to give more helpful error messages.
## Delayed Commits
To make fast parsers with precise error messages, this package lets you control when a parser **commits** to a certain path.
For example, you are trying to parse the following list:
```elm
[ 1, 23zm5, 3 ]
```
Ideally, you want the error at the `z`, but the libraries I have seen make this difficult to achieve efficiently. You often end up with an error at `[` because “something went wrong”.
**This package introduces [`delayedCommit`][delayedCommit] to resolve this.**
Say we want to create `intList`, a parser for comma separated lists of integers like `[1, 2, 3]`. We would say something like this:
[delayedCommit]: http://package.elm-lang.org/packages/elm-tools/parser/latest/Parser#delayedCommit
```elm
import Parser exposing (..)
{-| We start by ignoring the opening square brace and some spaces.
We only really care about the numbers, so we parse an `int` and
then use `intListHelp` to start chomping other list entries.
-}
intList : Parser (List Int)
intList =
succeed identity
|. symbol "["
|. spaces
|= andThen (\n -> intListHelp [n]) int
|. spaces
|. symbol "]"
{-| `intListHelp` checks if there is a `nextInt`. If so, it
continues trying to find more list items. If not, it gives
back the list of integers we have accumulated so far.
-}
intListHelp : List Int -> Parser (List Int)
intListHelp revInts =
oneOf
[ nextInt
|> andThen (\n -> intListHelp (n :: revInts))
, succeed (List.reverse revInts)
]
```
Now we get to the tricky part! How do we define `nextInt`? Here are two approaches, but only the second one actually works!
```elm
-- BAD
badNextInt : Parser Int
badNextInt =
succeed identity
|. spaces
|. symbol ","
|. spaces
|= int
-- GOOD
nextInt : Parser Int
nextInt =
delayedCommit spaces <|
succeed identity
|. symbol ","
|. spaces
|= int
```
The `badNextInt` looks pretty normal, but it will not work. It commits as soon as the first `spaces` parser succeeds. It fails in the following situation:
```elm
[ 1, 2, 3 ]
^
```
When we get to the closing `]` we have already successfully parsed some spaces. That means we are commited to `badNextInt` and need a comma. That fails, so the whole parse fails!
With `nextInt`, the [`delayedCommit`][delayedCommit] function is saying to parse `spaces` but only commit if progress is made *after* that. So we are only commited to this parser if we see a comma.
<br>
<br>
## [Comparison with Prior Work](https://github.com/elm-tools/parser/blob/master/comparison.md)

View File

@@ -0,0 +1,69 @@
## Comparison with Prior Work
I have not seen the [parser pipeline][1] or the [context stack][2] ideas in other libraries, but [delayed commits][3] relate to prior work.
[1]: README.md#parser-pipelines
[2]: README.md#tracking-context
[3]: README.md#delayed-commits
Most parser combinator libraries I have seen are based on Haskells Parsec library, which has primitives named `try` and `lookAhead`. I believe [`delayedCommitMap`][delayedCommitMap] is a better primitive for two reasons.
[delayedCommitMap]: http://package.elm-lang.org/packages/elm-tools/parser/latest/Parser#delayedCommitMap
### Performance and Composition
Say we want to create a precise error message for `length [1,,3]`. The naive approach with Haskells Parsec library produces very bad error messages:
```haskell
spaceThenArg :: Parser Expr
spaceThenArg =
try (spaces >> term)
```
This means we get a precise error from `term`, but then throw it away and say something went wrong at the space before the `[`. Very confusing! To improve quality, we must write something like this:
```haskell
spaceThenArg :: Parser Expr
spaceThenArg =
choice
[ do lookAhead (spaces >> char '[')
spaces
term
, try (spaces >> term)
]
```
Notice that we parse `spaces` twice no matter what.
Notice that we also had to hardcode `[` in the `lookAhead`. What if we update `term` to parse records that start with `{` as well? To get good commits on records, we must remember to update `lookAhead` to look for `oneOf "[{"`. Implementation details are leaking out of `term`!
With `delayedCommit` in this Elm library, you can just say:
```elm
spaceThenArg : Parser Expr
spaceThenArg =
delayedCommit spaces term
```
It does less work, and is more reliable as `term` evolves. I believe `delayedCommit` makes `lookAhead` pointless.
### Expressiveness
You can define `try` in terms of [`delayedCommitMap`][delayedCommitMap] like this:
```elm
try : Parser a -> Parser a
try parser =
delayedCommitMap always parser (succeed ())
```
No expressiveness is lost!
While it is possible to define `try`, I left it out of this package. In practice, `try` often leads to “bad commits” where your parser fails in a very specific way, but you then backtrack to a less specific error message. I considered naming it `allOrNothing` to better explain how it changes commit behavior, but ultimately, I thought it was best to encourage users to express their parsers with `delayedCommit` directly.
### Summary
Compared to previous work, `delayedCommit` lets you produce precise error messages **more efficiently**. By thinking about “commit behavior” directly, you also end up with **cleaner composition** of parsers. And these benefits come **without any loss of expressiveness**.

View File

@@ -0,0 +1,19 @@
{
"version": "2.0.1",
"summary": "a parsing library, focused on simplicity and great error messages",
"repository": "https://github.com/elm-tools/parser.git",
"license": "BSD-3-Clause",
"source-directories": [
"src"
],
"exposed-modules": [
"Parser",
"Parser.LanguageKit",
"Parser.LowLevel"
],
"dependencies": {
"elm-lang/core": "5.1.0 <= v < 6.0.0",
"elm-tools/parser-primitives": "1.0.0 <= v < 2.0.0"
},
"elm-version": "0.18.0 <= v < 0.19.0"
}

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,149 @@
module Parser.Internal exposing
( Parser(..)
, Step(..)
, State
, chomp
, chompDigits
, chompDotAndExp
, isBadIntEnd
)
import Char
import ParserPrimitives as Prim
-- PARSERS
type Parser ctx x a =
Parser (State ctx -> Step ctx x a)
type Step ctx x a
= Good a (State ctx)
| Bad x (State ctx)
type alias State ctx =
{ source : String
, offset : Int
, indent : Int
, context : List ctx
, row : Int
, col : Int
}
-- CHOMPERS
chomp : (Char -> Bool) -> Int -> String -> Int
chomp isGood offset source =
let
newOffset =
Prim.isSubChar isGood offset source
in
if newOffset < 0 then
offset
else
chomp isGood newOffset source
-- CHOMP DIGITS
chompDigits : (Char -> Bool) -> Int -> String -> Result Int Int
chompDigits isValidDigit offset source =
let
newOffset =
chomp isValidDigit offset source
in
-- no digits
if newOffset == offset then
Err newOffset
-- ends with non-digit characters
else if Prim.isSubChar isBadIntEnd newOffset source /= -1 then
Err newOffset
-- all valid digits!
else
Ok newOffset
isBadIntEnd : Char -> Bool
isBadIntEnd char =
Char.isDigit char
|| Char.isUpper char
|| Char.isLower char
|| char == '.'
-- CHOMP FLOAT STUFF
chompDotAndExp : Int -> String -> Result Int Int
chompDotAndExp offset source =
let
dotOffset =
Prim.isSubChar isDot offset source
in
if dotOffset == -1 then
chompExp offset source
else
chompExp (chomp Char.isDigit dotOffset source) source
isDot : Char -> Bool
isDot char =
char == '.'
chompExp : Int -> String -> Result Int Int
chompExp offset source =
let
eOffset =
Prim.isSubChar isE offset source
in
if eOffset == -1 then
Ok offset
else
let
opOffset =
Prim.isSubChar isPlusOrMinus eOffset source
expOffset =
if opOffset == -1 then eOffset else opOffset
in
if Prim.isSubChar isZero expOffset source /= -1 then
Err expOffset
else if Prim.isSubChar Char.isDigit expOffset source == -1 then
Err expOffset
else
chompDigits Char.isDigit expOffset source
isE : Char -> Bool
isE char =
char == 'e' || char == 'E'
isZero : Char -> Bool
isZero char =
char == '0'
isPlusOrMinus : Char -> Bool
isPlusOrMinus char =
char == '+' || char == '-'

View File

@@ -0,0 +1,509 @@
module Parser.LanguageKit exposing
( variable
, list, record, tuple, sequence, Trailing(..)
, whitespace, LineComment(..), MultiComment(..)
)
{-|
# Variables
@docs variable
# Lists, records, and that sort of thing
@docs list, record, tuple, sequence, Trailing
# Whitespace
@docs whitespace, LineComment, MultiComment
-}
import Set exposing (Set)
import Parser exposing (..)
import Parser.Internal as I exposing (Step(..), State)
import ParserPrimitives as Prim
-- VARIABLES
{-| Create a parser for variables. It takes two `Char` checkers. The
first one is for the first character. The second one is for all the
other characters.
In Elm, we distinguish between upper and lower case variables, so we
can do something like this:
import Char
import Parser exposing (..)
import Parser.LanguageKit exposing (variable)
import Set
lowVar : Parser String
lowVar =
variable Char.isLower isVarChar keywords
capVar : Parser String
capVar =
variable Char.isUpper isVarChar keywords
isVarChar : Char -> Bool
isVarChar char =
Char.isLower char
|| Char.isUpper char
|| Char.isDigit char
|| char == '_'
keywords : Set.Set String
keywords =
Set.fromList [ "let", "in", "case", "of" ]
-}
variable : (Char -> Bool) -> (Char -> Bool) -> Set String -> Parser String
variable isFirst isOther keywords =
I.Parser <| \({ source, offset, indent, context, row, col } as state1) ->
let
firstOffset =
Prim.isSubChar isFirst offset source
in
if firstOffset == -1 then
Bad ExpectingVariable state1
else
let
state2 =
if firstOffset == -2 then
varHelp isOther (offset + 1) (row + 1) 1 source indent context
else
varHelp isOther firstOffset row (col + 1) source indent context
name =
String.slice offset state2.offset source
in
if Set.member name keywords then
Bad ExpectingVariable state1
else
Good name state2
varHelp : (Char -> Bool) -> Int -> Int -> Int -> String -> Int -> List ctx -> State ctx
varHelp isGood offset row col source indent context =
let
newOffset =
Prim.isSubChar isGood offset source
in
if newOffset == -1 then
{ source = source
, offset = offset
, indent = indent
, context = context
, row = row
, col = col
}
else if newOffset == -2 then
varHelp isGood (offset + 1) (row + 1) 1 source indent context
else
varHelp isGood newOffset row (col + 1) source indent context
-- SEQUENCES
{-| Parse a comma-separated list like `[ 1, 2, 3 ]`. You provide
a parser for the spaces and for the list items. So if you want
to parse a list of integers, you would say:
import Parser exposing (Parser)
import Parser.LanguageKit as Parser
intList : Parser (List Int)
intList =
Parser.list spaces Parser.int
spaces : Parser ()
spaces =
Parser.ignore zeroOrMore (\char -> char == ' ')
-- run intList "[]" == Ok []
-- run intList "[ ]" == Ok []
-- run intList "[1,2,3]" == Ok [1,2,3]
-- run intList "[ 1, 2, 3 ]" == Ok [1,2,3]
-- run intList "[ 1 , 2 , 3 ]" == Ok [1,2,3]
-- run intList "[ 1, 2, 3, ]" == Err ...
-- run intList "[, 1, 2, 3 ]" == Err ...
**Note:** If you want trailing commas, check out the
[`sequence`](#sequence) function.
-}
list : Parser () -> Parser a -> Parser (List a)
list spaces item =
sequence
{ start = "["
, separator = ","
, end = "]"
, spaces = spaces
, item = item
, trailing = Forbidden
}
{-| Help parse records like `{ a = 2, b = 2 }`. You provide
a parser for the spaces and for the list items, you might say:
import Parser exposing ( Parser, (|.), (|=), zeroOrMore )
import Parser.LanguageKit as Parser
record : Parser (List (String, Int))
record =
Parser.record spaces field
field : Parser (String, Int)
field =
Parser.succeed (,)
|= lowVar
|. spaces
|. Parser.symbol "="
|. spaces
|= int
spaces : Parser ()
spaces =
Parser.ignore zeroOrMore (\char -> char == ' ')
-- run record "{}" == Ok []
-- run record "{ }" == Ok []
-- run record "{ x = 3 }" == Ok [ ("x",3) ]
-- run record "{ x = 3, }" == Err ...
-- run record "{ x = 3, y = 4 }" == Ok [ ("x",3), ("y",4) ]
-- run record "{ x = 3, y = }" == Err ...
**Note:** If you want trailing commas, check out the
[`sequence`](#sequence) function.
-}
record : Parser () -> Parser a -> Parser (List a)
record spaces item =
sequence
{ start = "{"
, separator = ","
, end = "}"
, spaces = spaces
, item = item
, trailing = Forbidden
}
{-| Help parse tuples like `(3, 4)`. Works just like [`list`](#list)
and [`record`](#record). And if you need something custom, check out
the [`sequence`](#sequence) function.
-}
tuple : Parser () -> Parser a -> Parser (List a)
tuple spaces item =
sequence
{ start = "("
, separator = ","
, end = ")"
, spaces = spaces
, item = item
, trailing = Forbidden
}
{-| Handle things *like* lists and records, but you can customize the
details however you need. Say you want to parse C-style code blocks:
import Parser exposing (Parser)
import Parser.LanguageKit as Parser exposing (Trailing(..))
block : Parser (List Stmt)
block =
Parser.sequence
{ start = "{"
, separator = ";"
, end = "}"
, spaces = spaces
, item = statement
, trailing = Mandatory -- demand a trailing semi-colon
}
-- spaces : Parser ()
-- statement : Parser Stmt
**Note:** If you need something more custom, do not be afraid to check
out the implementation and customize it for your case. It is better to
get nice error messages with a lower-level implementation than to try
to hack high-level parsers to do things they are not made for.
-}
sequence
: { start : String
, separator : String
, end : String
, spaces : Parser ()
, item : Parser a
, trailing : Trailing
}
-> Parser (List a)
sequence { start, end, spaces, item, separator, trailing } =
symbol start
|- spaces
|- sequenceEnd end spaces item separator trailing
{-| Whats the deal with trailing commas? Are they `Forbidden`?
Are they `Optional`? Are they `Mandatory`? Welcome to [shapes
club](http://poorlydrawnlines.com/comic/shapes-club/)!
-}
type Trailing = Forbidden | Optional | Mandatory
ignore : Parser ignore -> Parser keep -> Parser keep
ignore ignoreParser keepParser =
map2 revAlways ignoreParser keepParser
(|-) : Parser ignore -> Parser keep -> Parser keep
(|-) =
ignore
revAlways : ignore -> keep -> keep
revAlways _ keep =
keep
sequenceEnd : String -> Parser () -> Parser a -> String -> Trailing -> Parser (List a)
sequenceEnd end spaces parseItem sep trailing =
let
chompRest item =
case trailing of
Forbidden ->
sequenceEndForbidden end spaces parseItem sep [item]
Optional ->
sequenceEndOptional end spaces parseItem sep [item]
Mandatory ->
spaces
|- symbol sep
|- spaces
|- sequenceEndMandatory end spaces parseItem sep [item]
in
oneOf
[ parseItem
|> andThen chompRest
, symbol end
|- succeed []
]
sequenceEndForbidden : String -> Parser () -> Parser a -> String -> List a -> Parser (List a)
sequenceEndForbidden end spaces parseItem sep revItems =
let
chompRest item =
sequenceEndForbidden end spaces parseItem sep (item :: revItems)
in
ignore spaces <|
oneOf
[ symbol sep
|- spaces
|- andThen chompRest parseItem
, symbol end
|- succeed (List.reverse revItems)
]
sequenceEndOptional : String -> Parser () -> Parser a -> String -> List a -> Parser (List a)
sequenceEndOptional end spaces parseItem sep revItems =
let
parseEnd =
andThen (\_ -> succeed (List.reverse revItems)) (symbol end)
chompRest item =
sequenceEndOptional end spaces parseItem sep (item :: revItems)
in
ignore spaces <|
oneOf
[ symbol sep
|- spaces
|- oneOf [ andThen chompRest parseItem, parseEnd ]
, parseEnd
]
sequenceEndMandatory : String -> Parser () -> Parser a -> String -> List a -> Parser (List a)
sequenceEndMandatory end spaces parseItem sep revItems =
let
chompRest item =
sequenceEndMandatory end spaces parseItem sep (item :: revItems)
in
oneOf
[ andThen chompRest <|
parseItem
|. spaces
|. symbol sep
|. spaces
, symbol end
|- succeed (List.reverse revItems)
]
-- WHITESPACE
{-| Create a custom whitespace parser. It will always chomp the
`' '`, `'\r'`, and `'\n'` characters, but you can customize some
other things. Here are some examples:
elm : Parser ()
elm =
whitespace
{ allowTabs = False
, lineComment = LineComment "--"
, multiComment = NestableComment "{-" "-}"
}
js : Parser ()
js =
whitespace
{ allowTabs = True
, lineComment = LineComment "//"
, multiComment = UnnestableComment "/*" "*/"
}
If you need further customization, please open an issue describing your
scenario or check out the source code and write it yourself. This is all
built using stuff from the root `Parser` module.
-}
whitespace
: { allowTabs : Bool
, lineComment : LineComment
, multiComment : MultiComment
}
-> Parser ()
whitespace { allowTabs, lineComment, multiComment } =
let
tabParser =
if allowTabs then
[ Parser.ignore zeroOrMore isTab ]
else
[]
lineParser =
case lineComment of
NoLineComment ->
[]
LineComment start ->
[ symbol start
|. ignoreUntil "\n"
]
multiParser =
case multiComment of
NoMultiComment ->
[]
UnnestableComment start end ->
[ symbol start
|. ignoreUntil end
]
NestableComment start end ->
[ nestableComment start end
]
in
whitespaceHelp <|
oneOf (tabParser ++ lineParser ++ multiParser)
chompSpaces : Parser ()
chompSpaces =
Parser.ignore zeroOrMore isSpace
isSpace : Char -> Bool
isSpace char =
char == ' ' || char == '\n' || char == '\r'
isTab : Char -> Bool
isTab char =
char == '\t'
whitespaceHelp : Parser a -> Parser ()
whitespaceHelp parser =
ignore chompSpaces <|
oneOf [ andThen (\_ -> whitespaceHelp parser) parser, succeed () ]
{-| Are line comments allowed? If so, what symbol do they start with?
LineComment "--" -- Elm
LineComment "//" -- JS
LineComment "#" -- Python
NoLineComment -- OCaml
-}
type LineComment = NoLineComment | LineComment String
{-| Are multi-line comments allowed? If so, what symbols do they start
and end with?
NestableComment "{-" "-}" -- Elm
UnnestableComment "/*" "*/" -- JS
NoMultiComment -- Python
In Elm, you can nest multi-line comments. In C-like languages, like JS,
this is not allowed. As soon as you see a `*/` the comment is over no
matter what.
-}
type MultiComment
= NoMultiComment
| NestableComment String String
| UnnestableComment String String
nestableComment : String -> String -> Parser ()
nestableComment start end =
case (String.uncons start, String.uncons end) of
(Nothing, _) ->
fail "Trying to parse a multi-line comment, but the start token cannot be the empty string!"
(_, Nothing) ->
fail "Trying to parse a multi-line comment, but the end token cannot be the empty string!"
( Just (startChar, _), Just (endChar, _) ) ->
let
isNotRelevant char =
char /= startChar && char /= endChar
in
symbol start
|. nestableCommentHelp isNotRelevant start end 1
nestableCommentHelp : (Char -> Bool) -> String -> String -> Int -> Parser ()
nestableCommentHelp isNotRelevant start end nestLevel =
lazy <| \_ ->
ignore (Parser.ignore zeroOrMore isNotRelevant) <|
oneOf
[ ignore (symbol end) <|
if nestLevel == 1 then
succeed ()
else
nestableCommentHelp isNotRelevant start end (nestLevel - 1)
, ignore (symbol start) <|
nestableCommentHelp isNotRelevant start end (nestLevel + 1)
, ignore (Parser.ignore (Exactly 1) isChar) <|
nestableCommentHelp isNotRelevant start end nestLevel
]
isChar : Char -> Bool
isChar char =
True

View File

@@ -0,0 +1,118 @@
module Parser.LowLevel exposing
( getIndentLevel
, withIndentLevel
, getPosition
, getRow
, getCol
, getOffset
, getSource
)
{-| You are unlikely to need any of this under normal circumstances.
# Indentation
@docs getIndentLevel, withIndentLevel
# Row, Column, Offset, and Source
@docs getPosition, getRow, getCol, getOffset, getSource
-}
import Parser exposing (Parser)
import Parser.Internal as I exposing (State)
-- INDENTATION
{-| This parser tracks indentation level so you can parse indentation
sensitive languages. Indentation levels correspond to column numbers, so
it starts at 1.
-}
getIndentLevel : Parser Int
getIndentLevel =
I.Parser <| \state -> I.Good state.indent state
{-| Run a parser with a given indentation level. So you will likely
use `getCol` to get the current column, `andThen` give that to
`withIndentLevel`.
-}
withIndentLevel : Int -> Parser a -> Parser a
withIndentLevel newIndent (I.Parser parse) =
I.Parser <| \state1 ->
case parse (changeIndent newIndent state1) of
I.Good a state2 ->
I.Good a (changeIndent state1.indent state2)
I.Bad x state2 ->
I.Bad x (changeIndent state1.indent state2)
changeIndent : Int -> State ctx -> State ctx
changeIndent newIndent { source, offset, context, row, col } =
{ source = source
, offset = offset
, indent = newIndent
, context = context
, row = row
, col = col
}
-- POSITION
{-| Code editors treat code like a grid. There are rows and columns.
In most editors, rows and colums are 1-indexed. You move to a new row
whenever you see a `\n` character.
The `getPosition` parser succeeds with your current row and column
within the string you are parsing.
-}
getPosition : Parser (Int, Int)
getPosition =
I.Parser <| \state -> I.Good (state.row, state.col) state
{-| The `getRow` parser succeeds with your current row within
the string you are parsing.
-}
getRow : Parser Int
getRow =
I.Parser <| \state -> I.Good state.row state
{-| The `getCol` parser succeeds with your current column within
the string you are parsing.
-}
getCol : Parser Int
getCol =
I.Parser <| \state -> I.Good state.col state
{-| Editors think of code as a grid, but behind the scenes it is just
a flat array of UTF16 characters. `getOffset` tells you your index in
that flat array. So if you have read `"\n\n\n\n"` you are on row 5,
column 1, and offset 4.
**Note:** browsers use UTF16 strings, so characters may be one or two 16-bit
words. This means you can read 4 characters, but your offset will move by 8.
-}
getOffset : Parser Int
getOffset =
I.Parser <| \state -> I.Good state.offset state
{-| Get the entire string you are parsing right now. Paired with
`getOffset` this can let you use `String.slice` to grab substrings
with very little intermediate allocation.
-}
getSource : Parser String
getSource =
I.Parser <| \state -> I.Good state.source state

View File

@@ -3,15 +3,12 @@ module Data.Article
( Article
, Body
, Slug
, Tag
, bodyToHtml
, bodyToMarkdownString
, decoder
, decoderWithBody
, slugParser
, slugToString
, tagDecoder
, tagToString
)
import Data.Article.Author as Author exposing (Author)
@@ -110,24 +107,6 @@ slugToString (Slug slug) =
-- TAGS --
type Tag
= Tag String
tagToString : Tag -> String
tagToString (Tag slug) =
slug
tagDecoder : Decoder Tag
tagDecoder =
Decode.map Tag Decode.string
-- BODY --

View File

@@ -0,0 +1,55 @@
module Data.Article.Tag
exposing
( Tag
, decoder
, encode
, listParser
, toString
)
import Json.Decode as Decode exposing (Decoder)
import Json.Encode as Encode exposing (Value)
import Parser exposing ((|.), (|=), Parser, end, ignore, keep, oneOrMore, repeat, zeroOrMore)
type Tag
= Tag String
toString : Tag -> String
toString (Tag str) =
str
encode : Tag -> Value
encode (Tag str) =
Encode.string str
decoder : Decoder Tag
decoder =
Decode.map Tag Decode.string
listParser : Parser (List Tag)
listParser =
Parser.succeed (List.map Tag)
|. ignore zeroOrMore isWhitespace
|= repeat zeroOrMore tag
|. end
-- INTERNAL --
tag : Parser String
tag =
keep oneOrMore (\char -> not (isWhitespace char))
|. ignore zeroOrMore isWhitespace
isWhitespace : Char -> Bool
isWhitespace char =
-- Treat hashtags and commas as effectively whitespace; ignore them.
char == '#' || char == ',' || char == ' '

View File

@@ -1,6 +1,7 @@
module Page.Article.Editor exposing (Model, Msg, initEdit, initNew, update, view)
import Data.Article as Article exposing (Article, Body)
import Data.Article.Tag as Tag exposing (Tag)
import Data.Session exposing (Session)
import Data.User exposing (User)
import Html exposing (..)
@@ -8,6 +9,7 @@ import Html.Attributes exposing (attribute, class, defaultValue, disabled, href,
import Html.Events exposing (onInput, onSubmit)
import Http
import Page.Errored exposing (PageLoadError, pageLoadError)
import Parser
import Request.Article
import Route
import Task exposing (Task)
@@ -26,7 +28,7 @@ type alias Model =
, title : String
, body : String
, description : String
, tags : List String
, tags : String
, isSaving : Bool
}
@@ -38,7 +40,7 @@ initNew =
, title = ""
, body = ""
, description = ""
, tags = []
, tags = ""
, isSaving = False
}
@@ -60,7 +62,7 @@ initEdit session slug =
, title = article.title
, body = Article.bodyToMarkdownString article.body
, description = article.description
, tags = article.tags
, tags = String.join " " article.tags
, isSaving = False
}
)
@@ -121,7 +123,7 @@ viewForm model =
, Form.input
[ placeholder "Enter tags"
, onInput SetTags
, defaultValue (String.join " " model.tags)
, defaultValue model.tags
]
[]
, button [ class "btn btn-lg pull-xs-right btn-primary", disabled model.isSaving ]
@@ -152,10 +154,24 @@ update user msg model =
[] ->
case model.editingArticle of
Nothing ->
user.token
|> Request.Article.create model
|> Http.send CreateCompleted
|> pair { model | errors = [], isSaving = True }
case Parser.run Tag.listParser model.tags of
Ok tags ->
let
request =
Request.Article.create
{ tags = tags
, title = model.title
, body = model.body
, description = model.description
}
user.token
in
request
|> Http.send CreateCompleted
|> pair { model | errors = [], isSaving = True }
Err _ ->
( { model | errors = [ ( Tags, "Invalid tags." ) ] }, Cmd.none )
Just slug ->
user.token
@@ -173,7 +189,7 @@ update user msg model =
( { model | description = description }, Cmd.none )
SetTags tags ->
( { model | tags = tagsFromString tags }, Cmd.none )
( { model | tags = tags }, Cmd.none )
SetBody body ->
( { model | body = body }, Cmd.none )
@@ -217,6 +233,7 @@ type Field
= Form
| Title
| Body
| Tags
type alias Error =
@@ -235,14 +252,6 @@ modelValidator =
-- INTERNAL --
tagsFromString : String -> List String
tagsFromString str =
str
|> String.split " "
|> List.map String.trim
|> List.filter (not << String.isEmpty)
redirectToArticle : Article.Slug -> Cmd msg
redirectToArticle =
Route.modifyUrl << Route.Article

View File

@@ -3,7 +3,7 @@ module Page.Home exposing (Model, Msg, init, update, view)
{-| The homepage. You can get here via either the / or /#/ routes.
-}
import Data.Article as Article exposing (Tag)
import Data.Article.Tag as Tag exposing (Tag)
import Data.Session exposing (Session)
import Html exposing (..)
import Html.Attributes exposing (attribute, class, classList, href, id, placeholder)
@@ -100,7 +100,7 @@ viewTag tagName =
, href "javascript:void(0)"
, onClick (SelectTag tagName)
]
[ text (Article.tagToString tagName) ]
[ text (Tag.toString tagName) ]

View File

@@ -14,8 +14,9 @@ module Request.Article
, update
)
import Data.Article as Article exposing (Article, Body, Tag, slugToString)
import Data.Article as Article exposing (Article, Body, slugToString)
import Data.Article.Feed as Feed exposing (Feed)
import Data.Article.Tag as Tag exposing (Tag)
import Data.AuthToken exposing (AuthToken, withAuthorization)
import Data.User as User exposing (Username)
import Http
@@ -68,7 +69,7 @@ defaultListConfig =
list : ListConfig -> Maybe AuthToken -> Http.Request Feed
list config maybeToken =
[ ( "tag", Maybe.map Article.tagToString config.tag )
[ ( "tag", Maybe.map Tag.toString config.tag )
, ( "author", Maybe.map User.usernameToString config.author )
, ( "favorited", Maybe.map User.usernameToString config.favorited )
, ( "limit", Just (toString config.limit) )
@@ -114,7 +115,7 @@ feed config token =
tags : Http.Request (List Tag)
tags =
Decode.field "tags" (Decode.list Article.tagDecoder)
Decode.field "tags" (Decode.list Tag.decoder)
|> Http.get (apiUrl "/tags")
@@ -169,7 +170,7 @@ type alias CreateConfig record =
| title : String
, description : String
, body : String
, tags : List String
, tags : List Tag
}
@@ -194,7 +195,7 @@ create config token =
[ ( "title", Encode.string config.title )
, ( "description", Encode.string config.description )
, ( "body", Encode.string config.body )
, ( "tagList", Encode.list (List.map Encode.string config.tags) )
, ( "tagList", Encode.list (List.map Tag.encode config.tags) )
]
body =

View File

@@ -1,6 +1,6 @@
module Request.Article.Comments exposing (delete, list, post)
import Data.Article as Article exposing (Article, Tag, slugToString)
import Data.Article as Article exposing (Article, slugToString)
import Data.Article.Comment as Comment exposing (Comment, CommentId)
import Data.AuthToken exposing (AuthToken, withAuthorization)
import Http

View File

@@ -16,8 +16,9 @@ overkill, so we use simpler APIs instead.
-}
import Data.Article as Article exposing (Article, Tag)
import Data.Article as Article exposing (Article)
import Data.Article.Feed exposing (Feed)
import Data.Article.Tag as Tag exposing (Tag)
import Data.AuthToken exposing (AuthToken)
import Data.Session exposing (Session)
import Data.User exposing (Username)
@@ -126,7 +127,7 @@ sourceName source =
"Global Feed"
TagFeed tagName ->
"#" ++ Article.tagToString tagName
"#" ++ Tag.toString tagName
FavoritedFeed username ->
"Favorited Articles"