From 6627dfc326d9f133af5d472768696a01cc029aa2 Mon Sep 17 00:00:00 2001 From: Li Haoyi Date: Thu, 18 Oct 2018 12:35:39 +0800 Subject: [PATCH] make flatMap consume whitespace, introduce flatMapX that doesn't --- .../src/fastparse/internal/MacroImpls.scala | 24 +++++++++++++++++- fastparse/src/fastparse/package.scala | 25 +++++++++++++------ .../test/src/fastparse/ExampleTests.scala | 2 +- .../test/src/fastparse/IndentationTests.scala | 2 +- .../test/src/fastparse/ParsingTests.scala | 4 +-- pythonparse/src/pythonparse/Statements.scala | 2 +- readme/WritingParsers.scalatex | 5 ++++ 7 files changed, 50 insertions(+), 14 deletions(-) diff --git a/fastparse/src/fastparse/internal/MacroImpls.scala b/fastparse/src/fastparse/internal/MacroImpls.scala index 7d7da52b..a8988e3c 100644 --- a/fastparse/src/fastparse/internal/MacroImpls.scala +++ b/fastparse/src/fastparse/internal/MacroImpls.scala @@ -163,7 +163,7 @@ object MacroImpls { } - def flatMapMacro[T: c.WeakTypeTag, V: c.WeakTypeTag] + def flatMapXMacro[T: c.WeakTypeTag, V: c.WeakTypeTag] (c: Context) (f: c.Expr[T => ParsingRun[V]]): c.Expr[ParsingRun[V]] = { import c.universe._ @@ -176,6 +176,28 @@ object MacroImpls { } } + def flatMapMacro[T: c.WeakTypeTag, V: c.WeakTypeTag] + (c: Context) + (f: c.Expr[T => ParsingRun[V]]) + (whitespace: c.Expr[ParsingRun[Any] => ParsingRun[Unit]]): c.Expr[ParsingRun[V]] = { + import c.universe._ + + val lhs0 = c.prefix.asInstanceOf[c.Expr[EagerOps[T]]] + reify { + val lhs = lhs0.splice.parse0 + whitespace.splice match{ case ws => + if (!lhs.isSuccess) lhs.asInstanceOf[ParsingRun[V]] + else { + val oldCapturing = lhs.noDropBuffer + lhs.noDropBuffer = true + ws(lhs) + lhs.noDropBuffer = oldCapturing + f.splice(lhs.successValue.asInstanceOf[T]) + } + } + } + } + def eitherMacro[T: c.WeakTypeTag, V >: T: c.WeakTypeTag] (c: Context) (other: c.Expr[ParsingRun[V]]) diff --git a/fastparse/src/fastparse/package.scala b/fastparse/src/fastparse/package.scala index 0bfeb620..4a96ca40 100644 --- a/fastparse/src/fastparse/package.scala +++ b/fastparse/src/fastparse/package.scala @@ -177,12 +177,21 @@ package object fastparse { (implicit ctx: P[Any]): P[T] = macro MacroImpls.filterMacro[T] /** * Transforms the result of this parser using the given function into a - * new parser which is applied. Useful for doing dependent parsing, e.g. - * when parsing JSON you may first parse a character to see if it's a `[`, - * `{`, or `"`, and then deciding whether you next want to parse an array, - * dictionary or string. + * new parser which is applied (after whitespace). Useful for doing + * dependent parsing, e.g. when parsing JSON you may first parse a + * character to see if it's a `[`, `{`, or `"`, and then deciding whether + * you next want to parse an array, dictionary or string. */ - def flatMap[V](f: T => P[V]): P[V] = macro MacroImpls.flatMapMacro[T, V] + def flatMap[V](f: T => P[V]) + (implicit whitespace: P[Any] => P[Unit]): P[V] = macro MacroImpls.flatMapMacro[T, V] + /** + * Transforms the result of this parser using the given function into a + * new parser which is applied (without consuming whitespace). Useful for + * doing dependent parsing, e.g. when parsing JSON you may first parse a + * character to see if it's a `[`, `{`, or `"`, and then deciding whether + * you next want to parse an array, dictionary or string. + */ + def flatMapX[V](f: T => P[V]): P[V] = macro MacroImpls.flatMapXMacro[T, V] /** * Either-or operator: tries to parse the left-hand-side, and if that @@ -195,7 +204,7 @@ package object fastparse { /** * Capture operator; makes the parser return the span of input it parsed * as a [[String]], which can then be processed further using [[~]], - * [[map]] or [[flatMap]] + * [[map]] or [[flatMapX]] */ def !(implicit ctx: P[Any]): P[String] = macro MacroImpls.captureMacro @@ -590,9 +599,9 @@ package object fastparse { /** * Like [[AnyChar]], but returns the single character it parses. Useful - * together with [[EagerOps.flatMap]] to provide one-character-lookahead + * together with [[EagerOps.flatMapX]] to provide one-character-lookahead * style parsing: [[SingleChar]] consumes the single character, and then - * [[EagerOps.flatMap]] can `match` on that single character and decide + * [[EagerOps.flatMapX]] can `match` on that single character and decide * which downstream parser you wish to invoke */ def SingleChar(implicit ctx: P[_]): P[Char] = { diff --git a/fastparse/test/src/fastparse/ExampleTests.scala b/fastparse/test/src/fastparse/ExampleTests.scala index d86417b3..83ef65c8 100644 --- a/fastparse/test/src/fastparse/ExampleTests.scala +++ b/fastparse/test/src/fastparse/ExampleTests.scala @@ -192,7 +192,7 @@ object ExampleTests extends TestSuite{ 'flatMap{ def leftTag[_: P] = P( "<" ~ (!">" ~ AnyChar).rep(1).! ~ ">") def rightTag[_: P](s: String) = P( "" ) - def xml[_: P] = P( leftTag.flatMap(rightTag) ) + def xml[_: P] = P( leftTag.flatMapX(rightTag) ) val Parsed.Success("a", _) = parse("", xml(_)) val Parsed.Success("abcde", _) = parse("", xml(_)) diff --git a/fastparse/test/src/fastparse/IndentationTests.scala b/fastparse/test/src/fastparse/IndentationTests.scala index e93f8558..4f59527f 100644 --- a/fastparse/test/src/fastparse/IndentationTests.scala +++ b/fastparse/test/src/fastparse/IndentationTests.scala @@ -23,7 +23,7 @@ object IndentationTests extends TestSuite{ def number[_: P]: P[Int] = P( CharIn("0-9").rep(1).!.map(_.toInt) ) def deeper[_: P]: P[Int] = P( " ".rep(indent + 1).!.map(_.length) ) - def blockBody[_: P]: P[Seq[Int]] = "\n" ~ deeper.flatMap(i => + def blockBody[_: P]: P[Seq[Int]] = "\n" ~ deeper.flatMapX(i => new Parser(indent = i).factor.rep(1, sep = ("\n" + " " * i)./) ) def block[_: P]: P[Int] = P( CharIn("+\\-*/").! ~/ blockBody).map(eval) diff --git a/fastparse/test/src/fastparse/ParsingTests.scala b/fastparse/test/src/fastparse/ParsingTests.scala index 23423ab8..264288ce 100644 --- a/fastparse/test/src/fastparse/ParsingTests.scala +++ b/fastparse/test/src/fastparse/ParsingTests.scala @@ -216,7 +216,7 @@ object ParsingTests extends TestSuite{ // Broken out of the TestSuite block to avoid problems in our 2.10.x // build due to https://issues.scala-lang.org/browse/SI-7987 def checkFlatmap() = { - checkFail(implicit c => ("Hello" ~/ "Boo").flatMap(_ => Fail).?, ("HelloBoo", 0), 8) - checkFail(implicit c => (("Hello" ~/ "Boo").flatMap(_ => Pass) ~ Fail).?, ("HelloBoo", 0), 8) + checkFail(implicit c => ("Hello" ~/ "Boo").flatMapX(_ => Fail).?, ("HelloBoo", 0), 8) + checkFail(implicit c => (("Hello" ~/ "Boo").flatMapX(_ => Pass) ~ Fail).?, ("HelloBoo", 0), 8) } } diff --git a/pythonparse/src/pythonparse/Statements.scala b/pythonparse/src/pythonparse/Statements.scala index a5582c37..218d8686 100644 --- a/pythonparse/src/pythonparse/Statements.scala +++ b/pythonparse/src/pythonparse/Statements.scala @@ -189,7 +189,7 @@ class Statements(indent: Int){ _.collectFirst{ case (s, None) => s} }.filter(_.isDefined).map(_.get) } - def indented = P( deeper.flatMap{ nextIndent => + def indented = P( deeper.flatMapX{ nextIndent => new Statements(nextIndent).stmt.repX(1, spaces.repX(1) ~~ (" " * nextIndent | "\t" * nextIndent)).map(_.flatten) } ) P( indented | " ".rep ~ simple_stmt ) diff --git a/readme/WritingParsers.scalatex b/readme/WritingParsers.scalatex index 1ae9f026..becef61d 100644 --- a/readme/WritingParsers.scalatex +++ b/readme/WritingParsers.scalatex @@ -196,6 +196,11 @@ @p Which is equivalent and behaves exactly the same. + @p + Note that @code{.flatMap} consumes whitespace between the first + and second parsers; in cases where you do not want to do this, + use @code{.flatMapX} + @sect{Filter} @hl.ref(tests/"ExampleTests.scala", Seq("'filter", ""))