diff --git a/.github/ISSUE_TEMPLATE/string_scanner.md b/.github/ISSUE_TEMPLATE/string_scanner.md new file mode 100644 index 000000000..ad89f1b5b --- /dev/null +++ b/.github/ISSUE_TEMPLATE/string_scanner.md @@ -0,0 +1,5 @@ +--- +name: "package:string_scanner" +about: "Create a bug or file a feature request against package:string_scanner." +labels: "package:string_scanner" +--- \ No newline at end of file diff --git a/pkgs/string_scanner/.github/dependabot.yml b/pkgs/string_scanner/.github/dependabot.yml new file mode 100644 index 000000000..a19a66adf --- /dev/null +++ b/pkgs/string_scanner/.github/dependabot.yml @@ -0,0 +1,16 @@ +# Set update schedule for GitHub Actions +# See https://docs.github.com/en/free-pro-team@latest/github/administering-a-repository/keeping-your-actions-up-to-date-with-dependabot + +version: 2 +updates: + +- package-ecosystem: github-actions + directory: / + schedule: + interval: monthly + labels: + - autosubmit + groups: + github-actions: + patterns: + - "*" diff --git a/pkgs/string_scanner/.github/workflows/publish.yaml b/pkgs/string_scanner/.github/workflows/publish.yaml new file mode 100644 index 000000000..27157a046 --- /dev/null +++ b/pkgs/string_scanner/.github/workflows/publish.yaml @@ -0,0 +1,17 @@ +# A CI configuration to auto-publish pub packages. + +name: Publish + +on: + pull_request: + branches: [ master ] + push: + tags: [ 'v[0-9]+.[0-9]+.[0-9]+' ] + +jobs: + publish: + if: ${{ github.repository_owner == 'dart-lang' }} + uses: dart-lang/ecosystem/.github/workflows/publish.yaml@main + permissions: + id-token: write # Required for authentication using OIDC + pull-requests: write # Required for writing the pull request note diff --git a/pkgs/string_scanner/.github/workflows/test-package.yml b/pkgs/string_scanner/.github/workflows/test-package.yml new file mode 100644 index 000000000..c60f71070 --- /dev/null +++ b/pkgs/string_scanner/.github/workflows/test-package.yml @@ -0,0 +1,64 @@ +name: Dart CI + +on: + # Run on PRs and pushes to the default branch. + push: + branches: [ master ] + pull_request: + branches: [ master ] + schedule: + - cron: "0 0 * * 0" + +env: + PUB_ENVIRONMENT: bot.github + +jobs: + # Check code formatting and static analysis on a single OS (linux) + # against Dart dev. + analyze: + runs-on: ubuntu-latest + strategy: + fail-fast: false + matrix: + sdk: [dev] + steps: + - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 + - uses: dart-lang/setup-dart@e630b99d28a3b71860378cafdc2a067c71107f94 + with: + sdk: ${{ matrix.sdk }} + - id: install + name: Install dependencies + run: dart pub get + - name: Check formatting + run: dart format --output=none --set-exit-if-changed . + if: always() && steps.install.outcome == 'success' + - name: Analyze code + run: dart analyze --fatal-infos + if: always() && steps.install.outcome == 'success' + + # Run tests on a matrix consisting of two dimensions: + # 1. OS: ubuntu-latest, (macos-latest, windows-latest) + # 2. release channel: dev + test: + needs: analyze + runs-on: ${{ matrix.os }} + strategy: + fail-fast: false + matrix: + # Add macos-latest and/or windows-latest if relevant for this package. + os: [ubuntu-latest] + sdk: [3.1, dev] + steps: + - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 + - uses: dart-lang/setup-dart@e630b99d28a3b71860378cafdc2a067c71107f94 + with: + sdk: ${{ matrix.sdk }} + - id: install + name: Install dependencies + run: dart pub get + - name: Run VM tests + run: dart test --platform vm + if: always() && steps.install.outcome == 'success' + - name: Run Chrome tests + run: dart test --platform chrome + if: always() && steps.install.outcome == 'success' diff --git a/pkgs/string_scanner/.gitignore b/pkgs/string_scanner/.gitignore new file mode 100644 index 000000000..fb97bdebd --- /dev/null +++ b/pkgs/string_scanner/.gitignore @@ -0,0 +1,5 @@ +# Don’t commit the following directories created by pub. +.dart_tool/ +.pub/ +.packages +pubspec.lock diff --git a/pkgs/string_scanner/CHANGELOG.md b/pkgs/string_scanner/CHANGELOG.md new file mode 100644 index 000000000..082e9f201 --- /dev/null +++ b/pkgs/string_scanner/CHANGELOG.md @@ -0,0 +1,175 @@ +## 1.4.1 + +* Move to `dart-lang/tools` monorepo. + +## 1.4.0 + +* Fix `LineScanner`'s handling of `\r\n`'s to preventing errors scanning + zero-length matches when between CR and LF. CR is treated as a new line only + if not immediately followed by a LF. +* Fix `LineScanner`'s updating of `column` when setting `position` if the + current position is not `0`. + +## 1.3.0 + +* Require Dart 3.1.0 + +* Add a `SpanScanner.spanFromPosition()` method which takes raw code units + rather than `SpanScanner.spanFrom()`'s `LineScannerState`s. + +## 1.2.0 + +* Require Dart 2.18.0 + +* Add better support for reading code points in the Unicode supplementary plane: + + * Added `StringScanner.readCodePoint()`, which consumes an entire Unicode code + point even if it's represented by two UTF-16 code units. + + * Added `StringScanner.peekCodePoint()`, which returns an entire Unicode code + point even if it's represented by two UTF-16 code units. + + * `StringScanner.scanChar()` and `StringScanner.expectChar()` will now + properly consume two UTF-16 code units if they're passed Unicode code points + in the supplementary plane. + +## 1.1.1 + +* Populate the pubspec `repository` field. +* Switch to `package:lints`. +* Remove a dependency on `package:charcode`. + +## 1.1.0 + +* Stable release for null safety. + +## 1.1.0-nullsafety.3 + +* Update SDK constraints to `>=2.12.0-0 <3.0.0` based on beta release + guidelines. + +## 1.1.0-nullsafety.2 + +* Allow prerelease versions of the 2.12 sdk. + +## 1.1.0-nullsafety.1 + +- Allow 2.10 stable and 2.11.0 dev SDK versions. + +## 1.1.0-nullsafety + +- Migrate to null safety. + +## 1.0.5 + +- Added an example. + +- Update Dart SDK constraint to `>=2.0.0 <3.0.0`. + +## 1.0.4 + +* Add @alwaysThrows annotation to error method. + +## 1.0.3 + +* Set max SDK version to `<3.0.0`, and adjust other dependencies. + +## 1.0.2 + +* `SpanScanner` no longer crashes when creating a span that contains a UTF-16 + surrogate pair. + +## 1.0.1 + +* Fix the error text emitted by `StringScanner.expectChar()`. + +## 1.0.0 + +* **Breaking change**: `StringScanner.error()`'s `length` argument now defaults + to `0` rather than `1` when no match data is available. + +* **Breaking change**: `StringScanner.lastMatch` and related methods are now + reset when the scanner's position changes without producing a new match. + +**Note**: While the changes in `1.0.0` are user-visible, they're unlikely to +actually break any code in practice. Unless you know that your package is +incompatible with 0.1.x, consider using 0.1.5 as your lower bound rather +than 1.0.0. For example, `string_scanner: ">=0.1.5 <2.0.0"`. + +## 0.1.5 + +* Add `new SpanScanner.within()`, which scans within a existing `FileSpan`. + +* Add `StringScanner.scanChar()` and `StringScanner.expectChar()`. + +## 0.1.4+1 + +* Remove the dependency on `path`, since we don't actually import it. + +## 0.1.4 + +* Add `new SpanScanner.eager()` for creating a `SpanScanner` that eagerly + computes its current line and column numbers. + +## 0.1.3+2 + +* Fix `LineScanner`'s handling of carriage returns to match that of + `SpanScanner`. + +## 0.1.3+1 + +* Fixed the homepage URL. + +## 0.1.3 + +* Add an optional `endState` argument to `SpanScanner.spanFrom`. + +## 0.1.2 + +* Add `StringScanner.substring`, which returns a substring of the source string. + +## 0.1.1 + +* Declare `SpanScanner`'s exposed `SourceSpan`s and `SourceLocation`s to be + `FileSpan`s and `FileLocation`s. They always were underneath, but callers may + now rely on it. + +* Add `SpanScanner.location`, which returns the scanner's current + `SourceLocation`. + +## 0.1.0 + +* Switch from `source_maps`' `Span` class to `source_span`'s `SourceSpan` class. + +* `new StringScanner()`'s `sourceUrl` parameter is now named to make it clear + that it can be safely `null`. + +* `new StringScannerException()` takes different arguments in a different order + to match `SpanFormatException`. + +* `StringScannerException.string` has been renamed to + `StringScannerException.source` to match the `FormatException` interface. + +## 0.0.3 + +* Make `StringScannerException` inherit from source_map's `SpanFormatException`. + +## 0.0.2 + +* `new StringScanner()` now takes an optional `sourceUrl` argument that provides + the URL of the source file. This is used for error reporting. + +* Add `StringScanner.readChar()` and `StringScanner.peekChar()` methods for + doing character-by-character scanning. + +* Scanners now throw `StringScannerException`s which provide more detailed + access to information about the errors that were thrown and can provide + terminal-colored messages. + +* Add a `LineScanner` subclass of `StringScanner` that automatically tracks line + and column information of the text being scanned. + +* Add a `SpanScanner` subclass of `LineScanner` that exposes matched ranges as + [source map][] `Span` objects. + +[source_map]: https://pub.dev/packages/source_maps diff --git a/pkgs/string_scanner/LICENSE b/pkgs/string_scanner/LICENSE new file mode 100644 index 000000000..000cd7bec --- /dev/null +++ b/pkgs/string_scanner/LICENSE @@ -0,0 +1,27 @@ +Copyright 2014, the Dart project authors. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above + copyright notice, this list of conditions and the following + disclaimer in the documentation and/or other materials provided + with the distribution. + * Neither the name of Google LLC nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/pkgs/string_scanner/README.md b/pkgs/string_scanner/README.md new file mode 100644 index 000000000..e06e32567 --- /dev/null +++ b/pkgs/string_scanner/README.md @@ -0,0 +1,41 @@ +[![Dart CI](https://github.com/dart-lang/string_scanner/actions/workflows/test-package.yml/badge.svg)](https://github.com/dart-lang/string_scanner/actions/workflows/test-package.yml) +[![pub package](https://img.shields.io/pub/v/string_scanner.svg)](https://pub.dev/packages/string_scanner) +[![package publisher](https://img.shields.io/pub/publisher/string_scanner.svg)](https://pub.dev/packages/string_scanner/publisher) + +This package exposes a `StringScanner` type that makes it easy to parse a string +using a series of `Pattern`s. For example: + +```dart +import 'dart:math' as math; + +import 'package:string_scanner/string_scanner.dart'; + +num parseNumber(String source) { + // Scan a number ("1", "1.5", "-3"). + final scanner = StringScanner(source); + + // [Scanner.scan] tries to consume a [Pattern] and returns whether or not it + // succeeded. It will move the scan pointer past the end of the pattern. + final negative = scanner.scan('-'); + + // [Scanner.expect] consumes a [Pattern] and throws a [FormatError] if it + // fails. Like [Scanner.scan], it will move the scan pointer forward. + scanner.expect(RegExp(r'\d+')); + + // [Scanner.lastMatch] holds the [MatchData] for the most recent call to + // [Scanner.scan], [Scanner.expect], or [Scanner.matches]. + var number = num.parse(scanner.lastMatch![0]!); + + if (scanner.scan('.')) { + scanner.expect(RegExp(r'\d+')); + final decimal = scanner.lastMatch![0]!; + number += int.parse(decimal) / math.pow(10, decimal.length); + } + + // [Scanner.expectDone] will throw a [FormatError] if there's any input that + // hasn't yet been consumed. + scanner.expectDone(); + + return (negative ? -1 : 1) * number; +} +``` diff --git a/pkgs/string_scanner/analysis_options.yaml b/pkgs/string_scanner/analysis_options.yaml new file mode 100644 index 000000000..59f763a4d --- /dev/null +++ b/pkgs/string_scanner/analysis_options.yaml @@ -0,0 +1,32 @@ +# https://dart.dev/guides/language/analysis-options +include: package:dart_flutter_team_lints/analysis_options.yaml + +analyzer: + language: + strict-casts: true + strict-inference: true + strict-raw-types: true + +linter: + rules: + - avoid_bool_literals_in_conditional_expressions + - avoid_classes_with_only_static_members + - avoid_private_typedef_functions + - avoid_redundant_argument_values + - avoid_returning_this + - avoid_unused_constructor_parameters + - avoid_void_async + - cancel_subscriptions + - join_return_with_assignment + - literal_only_boolean_expressions + - missing_whitespace_between_adjacent_strings + - no_adjacent_strings_in_list + - no_runtimeType_toString + - prefer_const_declarations + - prefer_expression_function_bodies + - prefer_final_locals + - unnecessary_await_in_return + - unnecessary_raw_strings + - use_if_null_to_convert_nulls_to_bools + - use_raw_strings + - use_string_buffers diff --git a/pkgs/string_scanner/example/example.dart b/pkgs/string_scanner/example/example.dart new file mode 100644 index 000000000..ec9dd7660 --- /dev/null +++ b/pkgs/string_scanner/example/example.dart @@ -0,0 +1,40 @@ +// Copyright (c) 2019, the Dart project authors. Please see the AUTHORS file +// for details. All rights reserved. Use of this source code is governed by a +// BSD-style license that can be found in the LICENSE file. + +import 'dart:math' as math; + +import 'package:string_scanner/string_scanner.dart'; + +void main(List args) { + print(parseNumber(args.single)); +} + +num parseNumber(String source) { + // Scan a number ("1", "1.5", "-3"). + final scanner = StringScanner(source); + + // [Scanner.scan] tries to consume a [Pattern] and returns whether or not it + // succeeded. It will move the scan pointer past the end of the pattern. + final negative = scanner.scan('-'); + + // [Scanner.expect] consumes a [Pattern] and throws a [FormatError] if it + // fails. Like [Scanner.scan], it will move the scan pointer forward. + scanner.expect(RegExp(r'\d+')); + + // [Scanner.lastMatch] holds the [MatchData] for the most recent call to + // [Scanner.scan], [Scanner.expect], or [Scanner.matches]. + var number = num.parse(scanner.lastMatch![0]!); + + if (scanner.scan('.')) { + scanner.expect(RegExp(r'\d+')); + final decimal = scanner.lastMatch![0]!; + number += int.parse(decimal) / math.pow(10, decimal.length); + } + + // [Scanner.expectDone] will throw a [FormatError] if there's any input that + // hasn't yet been consumed. + scanner.expectDone(); + + return (negative ? -1 : 1) * number; +} diff --git a/pkgs/string_scanner/lib/src/charcode.dart b/pkgs/string_scanner/lib/src/charcode.dart new file mode 100644 index 000000000..d15774935 --- /dev/null +++ b/pkgs/string_scanner/lib/src/charcode.dart @@ -0,0 +1,24 @@ +// Copyright (c) 2020, the Dart project authors. Please see the AUTHORS file +// for details. All rights reserved. Use of this source code is governed by a +// BSD-style license that can be found in the LICENSE file. + +/// Character '\'. +const int $backslash = 0x5C; + +/// "Carriage return" control character. +const int $cr = 0x0D; + +/// Character '"'. +const int $doubleQuote = 0x22; + +/// Character 'f'. +const int $f = 0x66; + +/// "Line feed" control character. +const int $lf = 0x0A; + +/// Space character. +const int $space = 0x20; + +/// Character 'x'. +const int $x = 0x78; diff --git a/pkgs/string_scanner/lib/src/eager_span_scanner.dart b/pkgs/string_scanner/lib/src/eager_span_scanner.dart new file mode 100644 index 000000000..1ccc746ac --- /dev/null +++ b/pkgs/string_scanner/lib/src/eager_span_scanner.dart @@ -0,0 +1,133 @@ +// Copyright (c) 2015, the Dart project authors. Please see the AUTHORS file +// for details. All rights reserved. Use of this source code is governed by a +// BSD-style license that can be found in the LICENSE file. + +import 'charcode.dart'; +import 'line_scanner.dart'; +import 'span_scanner.dart'; +import 'utils.dart'; + +// TODO(nweiz): Currently this duplicates code in line_scanner.dart. Once +// sdk#23770 is fully complete, we should move the shared code into a mixin. + +/// A regular expression matching newlines across platforms. +final _newlineRegExp = RegExp(r'\r\n?|\n'); + +/// A [SpanScanner] that tracks the line and column eagerly, like [LineScanner]. +class EagerSpanScanner extends SpanScanner { + @override + int get line => _line; + int _line = 0; + + @override + int get column => _column; + int _column = 0; + + @override + LineScannerState get state => + _EagerSpanScannerState(this, position, line, column); + + bool get _betweenCRLF => peekChar(-1) == $cr && peekChar() == $lf; + + @override + set state(LineScannerState state) { + if (state is! _EagerSpanScannerState || !identical(state._scanner, this)) { + throw ArgumentError('The given LineScannerState was not returned by ' + 'this LineScanner.'); + } + + super.position = state.position; + _line = state.line; + _column = state.column; + } + + @override + set position(int newPosition) { + final oldPosition = position; + super.position = newPosition; + + if (newPosition > oldPosition) { + final newlines = _newlinesIn(string.substring(oldPosition, newPosition)); + _line += newlines.length; + if (newlines.isEmpty) { + _column += newPosition - oldPosition; + } else { + _column = newPosition - newlines.last.end; + } + } else { + final newlines = _newlinesIn(string.substring(newPosition, oldPosition)); + if (_betweenCRLF) newlines.removeLast(); + + _line -= newlines.length; + if (newlines.isEmpty) { + _column -= oldPosition - newPosition; + } else { + _column = + newPosition - string.lastIndexOf(_newlineRegExp, newPosition) - 1; + } + } + } + + EagerSpanScanner(super.string, {super.sourceUrl, super.position}); + + @override + bool scanChar(int character) { + if (!super.scanChar(character)) return false; + _adjustLineAndColumn(character); + return true; + } + + @override + int readChar() { + final character = super.readChar(); + _adjustLineAndColumn(character); + return character; + } + + /// Adjusts [_line] and [_column] after having consumed [character]. + void _adjustLineAndColumn(int character) { + if (character == $lf || (character == $cr && peekChar() != $lf)) { + _line += 1; + _column = 0; + } else { + _column += inSupplementaryPlane(character) ? 2 : 1; + } + } + + @override + bool scan(Pattern pattern) { + if (!super.scan(pattern)) return false; + final firstMatch = lastMatch![0]!; + + final newlines = _newlinesIn(firstMatch); + _line += newlines.length; + if (newlines.isEmpty) { + _column += firstMatch.length; + } else { + _column = firstMatch.length - newlines.last.end; + } + + return true; + } + + /// Returns a list of [Match]es describing all the newlines in [text], which + /// is assumed to end at [position]. + List _newlinesIn(String text) { + final newlines = _newlineRegExp.allMatches(text).toList(); + if (_betweenCRLF) newlines.removeLast(); + return newlines; + } +} + +/// A class representing the state of an [EagerSpanScanner]. +class _EagerSpanScannerState implements LineScannerState { + final EagerSpanScanner _scanner; + @override + final int position; + @override + final int line; + @override + final int column; + + _EagerSpanScannerState(this._scanner, this.position, this.line, this.column); +} diff --git a/pkgs/string_scanner/lib/src/exception.dart b/pkgs/string_scanner/lib/src/exception.dart new file mode 100644 index 000000000..57af541ff --- /dev/null +++ b/pkgs/string_scanner/lib/src/exception.dart @@ -0,0 +1,21 @@ +// Copyright (c) 2014, the Dart project authors. Please see the AUTHORS file +// for details. All rights reserved. Use of this source code is governed by a +// BSD-style license that can be found in the LICENSE file. + +import 'package:source_span/source_span.dart'; + +import 'string_scanner.dart'; + +/// An exception thrown by a [StringScanner] that failed to parse a string. +class StringScannerException extends SourceSpanFormatException { + @override + String get source => super.source as String; + + /// The URL of the source file being parsed. + /// + /// This may be `null`, indicating that the source URL is unknown. + Uri? get sourceUrl => span?.sourceUrl; + + StringScannerException( + super.message, SourceSpan super.span, String super.source); +} diff --git a/pkgs/string_scanner/lib/src/line_scanner.dart b/pkgs/string_scanner/lib/src/line_scanner.dart new file mode 100644 index 000000000..b18d61057 --- /dev/null +++ b/pkgs/string_scanner/lib/src/line_scanner.dart @@ -0,0 +1,183 @@ +// Copyright (c) 2014, the Dart project authors. Please see the AUTHORS file +// for details. All rights reserved. Use of this source code is governed by a +// BSD-style license that can be found in the LICENSE file. + +import 'charcode.dart'; +import 'string_scanner.dart'; +import 'utils.dart'; + +// Note that much of this code is duplicated in eager_span_scanner.dart. + +/// A regular expression matching newlines. A newline is either a `\n`, a `\r\n` +/// or a `\r` that is not immediately followed by a `\n`. +final _newlineRegExp = RegExp(r'\n|\r\n|\r(?!\n)'); + +/// A subclass of [StringScanner] that tracks line and column information. +class LineScanner extends StringScanner { + /// The scanner's current (zero-based) line number. + int get line => _line; + int _line = 0; + + /// The scanner's current (zero-based) column number. + int get column => _column; + int _column = 0; + + /// The scanner's state, including line and column information. + /// + /// This can be used to efficiently save and restore the state of the scanner + /// when backtracking. A given [LineScannerState] is only valid for the + /// [LineScanner] that created it. + /// + /// This does not include the scanner's match information. + LineScannerState get state => + LineScannerState._(this, position, line, column); + + /// Whether the current position is between a CR character and an LF + /// charactet. + bool get _betweenCRLF => peekChar(-1) == $cr && peekChar() == $lf; + + set state(LineScannerState state) { + if (!identical(state._scanner, this)) { + throw ArgumentError('The given LineScannerState was not returned by ' + 'this LineScanner.'); + } + + super.position = state.position; + _line = state.line; + _column = state.column; + } + + @override + set position(int newPosition) { + if (newPosition == position) { + return; + } + + final oldPosition = position; + super.position = newPosition; + + if (newPosition == 0) { + _line = 0; + _column = 0; + } else if (newPosition > oldPosition) { + final newlines = _newlinesIn(string.substring(oldPosition, newPosition), + endPosition: newPosition); + _line += newlines.length; + if (newlines.isEmpty) { + _column += newPosition - oldPosition; + } else { + // The regex got a substring, so we need to account for where it started + // in the string. + final offsetOfLastNewline = oldPosition + newlines.last.end; + _column = newPosition - offsetOfLastNewline; + } + } else if (newPosition < oldPosition) { + final newlines = _newlinesIn(string.substring(newPosition, oldPosition), + endPosition: oldPosition); + + _line -= newlines.length; + if (newlines.isEmpty) { + _column -= oldPosition - newPosition; + } else { + // To compute the new column, we need to locate the last newline before + // the new position. When searching, we must exclude the CR if we're + // between a CRLF because it's not considered a newline. + final crOffset = _betweenCRLF ? -1 : 0; + // Additionally, if we use newPosition as the end of the search and the + // character at that position itself (the next character) is a newline + // we should not use it, so also offset to account for that. + const currentCharOffset = -1; + final lastNewline = string.lastIndexOf( + _newlineRegExp, newPosition + currentCharOffset + crOffset); + + // Now we need to know the offset after the newline. This is the index + // above plus the length of the newline (eg. if we found `\r\n`) we need + // to add two. However if no newline was found, that index is 0. + final offsetAfterLastNewline = lastNewline == -1 + ? 0 + : string[lastNewline] == '\r' && string[lastNewline + 1] == '\n' + ? lastNewline + 2 + : lastNewline + 1; + + _column = newPosition - offsetAfterLastNewline; + } + } + } + + LineScanner(super.string, {super.sourceUrl, super.position}); + + @override + bool scanChar(int character) { + if (!super.scanChar(character)) return false; + _adjustLineAndColumn(character); + return true; + } + + @override + int readChar() { + final character = super.readChar(); + _adjustLineAndColumn(character); + return character; + } + + /// Adjusts [_line] and [_column] after having consumed [character]. + void _adjustLineAndColumn(int character) { + if (character == $lf || (character == $cr && peekChar() != $lf)) { + _line += 1; + _column = 0; + } else { + _column += inSupplementaryPlane(character) ? 2 : 1; + } + } + + @override + bool scan(Pattern pattern) { + if (!super.scan(pattern)) return false; + + final newlines = _newlinesIn(lastMatch![0]!, endPosition: position); + _line += newlines.length; + if (newlines.isEmpty) { + _column += lastMatch![0]!.length; + } else { + _column = lastMatch![0]!.length - newlines.last.end; + } + + return true; + } + + /// Returns a list of [Match]es describing all the newlines in [text], which + /// ends at [endPosition]. + /// + /// If [text] ends with `\r`, it will only be treated as a newline if the next + /// character at [position] is not a `\n`. + List _newlinesIn(String text, {required int endPosition}) { + final newlines = _newlineRegExp.allMatches(text).toList(); + // If the last character is a `\r` it will have been treated as a newline, + // but this is only valid if the next character is not a `\n`. + if (endPosition < string.length && + text.endsWith('\r') && + string[endPosition] == '\n') { + // newlines should never be empty here, because if `text` ends with `\r` + // it would have matched `\r(?!\n)` in the newline regex. + newlines.removeLast(); + } + return newlines; + } +} + +/// A class representing the state of a [LineScanner]. +class LineScannerState { + /// The [LineScanner] that created this. + final LineScanner _scanner; + + /// The position of the scanner in this state. + final int position; + + /// The zero-based line number of the scanner in this state. + final int line; + + /// The zero-based column number of the scanner in this state. + final int column; + + LineScannerState._(this._scanner, this.position, this.line, this.column); +} diff --git a/pkgs/string_scanner/lib/src/relative_span_scanner.dart b/pkgs/string_scanner/lib/src/relative_span_scanner.dart new file mode 100644 index 000000000..cd9af0e03 --- /dev/null +++ b/pkgs/string_scanner/lib/src/relative_span_scanner.dart @@ -0,0 +1,132 @@ +// Copyright (c) 2016, the Dart project authors. Please see the AUTHORS file +// for details. All rights reserved. Use of this source code is governed by a +// BSD-style license that can be found in the LICENSE file. + +import 'package:source_span/source_span.dart'; + +import 'exception.dart'; +import 'line_scanner.dart'; +import 'span_scanner.dart'; +import 'string_scanner.dart'; +import 'utils.dart'; + +/// A [SpanScanner] that scans within an existing [FileSpan]. +/// +/// This re-implements chunks of [SpanScanner] rather than using a dummy span or +/// inheritance because scanning is often a performance-critical operation, so +/// it's important to avoid adding extra overhead when relative scanning isn't +/// needed. +class RelativeSpanScanner extends StringScanner implements SpanScanner { + /// The source of the scanner. + /// + /// This caches line break information and is used to generate [SourceSpan]s. + final SourceFile _sourceFile; + + /// The start location of the span within which this scanner is scanning. + /// + /// This is used to convert between span-relative and file-relative fields. + final FileLocation _startLocation; + + @override + int get line => + _sourceFile.getLine(_startLocation.offset + position) - + _startLocation.line; + + @override + int get column { + final line = _sourceFile.getLine(_startLocation.offset + position); + final column = + _sourceFile.getColumn(_startLocation.offset + position, line: line); + return line == _startLocation.line + ? column - _startLocation.column + : column; + } + + @override + LineScannerState get state => _SpanScannerState(this, position); + + @override + set state(LineScannerState state) { + if (state is! _SpanScannerState || !identical(state._scanner, this)) { + throw ArgumentError('The given LineScannerState was not returned by ' + 'this LineScanner.'); + } + + position = state.position; + } + + @override + FileSpan? get lastSpan => _lastSpan; + FileSpan? _lastSpan; + + @override + FileLocation get location => + _sourceFile.location(_startLocation.offset + position); + + @override + FileSpan get emptySpan => location.pointSpan(); + + RelativeSpanScanner(FileSpan span) + : _sourceFile = span.file, + _startLocation = span.start, + super(span.text, sourceUrl: span.sourceUrl); + + @override + FileSpan spanFrom(LineScannerState startState, [LineScannerState? endState]) { + final endPosition = endState == null ? position : endState.position; + return _sourceFile.span(_startLocation.offset + startState.position, + _startLocation.offset + endPosition); + } + + @override + FileSpan spanFromPosition(int startPosition, [int? endPosition]) { + RangeError.checkValidRange( + startPosition, + endPosition, + _sourceFile.length - _startLocation.offset, + 'startPosition', + 'endPosition'); + return _sourceFile.span(_startLocation.offset + startPosition, + _startLocation.offset + (endPosition ?? position)); + } + + @override + bool matches(Pattern pattern) { + if (!super.matches(pattern)) { + _lastSpan = null; + return false; + } + + _lastSpan = _sourceFile.span(_startLocation.offset + position, + _startLocation.offset + lastMatch!.end); + return true; + } + + @override + Never error(String message, {Match? match, int? position, int? length}) { + validateErrorArgs(string, match, position, length); + + if (match == null && position == null && length == null) match = lastMatch; + position ??= match == null ? this.position : match.start; + length ??= match == null ? 1 : match.end - match.start; + + final span = _sourceFile.span(_startLocation.offset + position, + _startLocation.offset + position + length); + throw StringScannerException(message, span, string); + } +} + +/// A class representing the state of a [SpanScanner]. +class _SpanScannerState implements LineScannerState { + /// The [SpanScanner] that created this. + final RelativeSpanScanner _scanner; + + @override + final int position; + @override + int get line => _scanner._sourceFile.getLine(position); + @override + int get column => _scanner._sourceFile.getColumn(position); + + _SpanScannerState(this._scanner, this.position); +} diff --git a/pkgs/string_scanner/lib/src/span_scanner.dart b/pkgs/string_scanner/lib/src/span_scanner.dart new file mode 100644 index 000000000..509cf6004 --- /dev/null +++ b/pkgs/string_scanner/lib/src/span_scanner.dart @@ -0,0 +1,142 @@ +// Copyright (c) 2014, the Dart project authors. Please see the AUTHORS file +// for details. All rights reserved. Use of this source code is governed by a +// BSD-style license that can be found in the LICENSE file. + +import 'package:source_span/source_span.dart'; + +import 'eager_span_scanner.dart'; +import 'exception.dart'; +import 'line_scanner.dart'; +import 'relative_span_scanner.dart'; +import 'string_scanner.dart'; +import 'utils.dart'; + +/// A subclass of [LineScanner] that exposes matched ranges as source map +/// [FileSpan]s. +class SpanScanner extends StringScanner implements LineScanner { + /// The source of the scanner. + /// + /// This caches line break information and is used to generate [FileSpan]s. + final SourceFile _sourceFile; + + @override + int get line => _sourceFile.getLine(position); + @override + int get column => _sourceFile.getColumn(position); + + @override + LineScannerState get state => _SpanScannerState(this, position); + + @override + set state(LineScannerState state) { + if (state is! _SpanScannerState || !identical(state._scanner, this)) { + throw ArgumentError('The given LineScannerState was not returned by ' + 'this LineScanner.'); + } + + position = state.position; + } + + /// The [FileSpan] for [lastMatch]. + /// + /// This is the span for the entire match. There's no way to get spans for + /// subgroups since [Match] exposes no information about their positions. + FileSpan? get lastSpan { + if (lastMatch == null) _lastSpan = null; + return _lastSpan; + } + + FileSpan? _lastSpan; + + /// The current location of the scanner. + FileLocation get location => _sourceFile.location(position); + + /// Returns an empty span at the current location. + FileSpan get emptySpan => location.pointSpan(); + + /// Creates a new [SpanScanner] that starts scanning from [position]. + /// + /// [sourceUrl] is used as [SourceLocation.sourceUrl] for the returned + /// [FileSpan]s as well as for error reporting. It can be a [String], a + /// [Uri], or `null`. + SpanScanner(super.string, {super.sourceUrl, super.position}) + : _sourceFile = SourceFile.fromString(string, url: sourceUrl); + + /// Creates a new [SpanScanner] that eagerly computes line and column numbers. + /// + /// In general [SpanScanner.new] will be more efficient, since it avoids extra + /// computation on every scan. However, eager scanning can be useful for + /// situations where the normal course of parsing frequently involves + /// accessing the current line and column numbers. + /// + /// Note that *only* the `line` and `column` fields on the `SpanScanner` + /// itself and its `LineScannerState` are eagerly computed. To limit their + /// memory footprint, returned spans and locations will still lazily compute + /// their line and column numbers. + factory SpanScanner.eager(String string, {sourceUrl, int? position}) = + EagerSpanScanner; + + /// Creates a new [SpanScanner] that scans within [span]. + /// + /// This scans through [span]`.text, but emits new spans from [span]`.file` in + /// their appropriate relative positions. The [string] field contains only + /// [span]`.text`, and [position], [line], and [column] are all relative to + /// the span. + factory SpanScanner.within(FileSpan span) = RelativeSpanScanner; + + /// Creates a [FileSpan] representing the source range between [startState] + /// and the current position. + FileSpan spanFrom(LineScannerState startState, [LineScannerState? endState]) { + final endPosition = endState == null ? position : endState.position; + return _sourceFile.span(startState.position, endPosition); + } + + /// Creates a [FileSpan] representing the source range between [startPosition] + /// and [endPosition], or the current position if [endPosition] is null. + /// + /// Each position should be a code unit offset into the string being scanned, + /// with the same conventions as [StringScanner.position]. + /// + /// Throws a [RangeError] if [startPosition] or [endPosition] aren't within + /// this source file. + FileSpan spanFromPosition(int startPosition, [int? endPosition]) => + _sourceFile.span(startPosition, endPosition ?? position); + + @override + bool matches(Pattern pattern) { + if (!super.matches(pattern)) { + _lastSpan = null; + return false; + } + + _lastSpan = _sourceFile.span(position, lastMatch!.end); + return true; + } + + @override + Never error(String message, {Match? match, int? position, int? length}) { + validateErrorArgs(string, match, position, length); + + if (match == null && position == null && length == null) match = lastMatch; + position ??= match == null ? this.position : match.start; + length ??= match == null ? 0 : match.end - match.start; + + final span = _sourceFile.span(position, position + length); + throw StringScannerException(message, span, string); + } +} + +/// A class representing the state of a [SpanScanner]. +class _SpanScannerState implements LineScannerState { + /// The [SpanScanner] that created this. + final SpanScanner _scanner; + + @override + final int position; + @override + int get line => _scanner._sourceFile.getLine(position); + @override + int get column => _scanner._sourceFile.getColumn(position); + + _SpanScannerState(this._scanner, this.position); +} diff --git a/pkgs/string_scanner/lib/src/string_scanner.dart b/pkgs/string_scanner/lib/src/string_scanner.dart new file mode 100644 index 000000000..146694432 --- /dev/null +++ b/pkgs/string_scanner/lib/src/string_scanner.dart @@ -0,0 +1,272 @@ +// Copyright (c) 2014, the Dart project authors. Please see the AUTHORS file +// for details. All rights reserved. Use of this source code is governed by a +// BSD-style license that can be found in the LICENSE file. + +import 'package:source_span/source_span.dart'; + +import 'charcode.dart'; +import 'exception.dart'; +import 'utils.dart'; + +/// A class that scans through a string using [Pattern]s. +class StringScanner { + /// The URL of the source of the string being scanned. + /// + /// This is used for error reporting. It may be `null`, indicating that the + /// source URL is unknown or unavailable. + final Uri? sourceUrl; + + /// The string being scanned through. + final String string; + + /// The current position of the scanner in the string, in characters. + int get position => _position; + set position(int position) { + if (position.isNegative || position > string.length) { + throw ArgumentError('Invalid position $position'); + } + + _position = position; + _lastMatch = null; + } + + int _position = 0; + + /// The data about the previous match made by the scanner. + /// + /// If the last match failed, this will be `null`. + Match? get lastMatch { + // Lazily unset [_lastMatch] so that we avoid extra assignments in + // character-by-character methods that are used in core loops. + if (_position != _lastMatchPosition) _lastMatch = null; + return _lastMatch; + } + + Match? _lastMatch; + int? _lastMatchPosition; + + /// The portion of the string that hasn't yet been scanned. + String get rest => string.substring(position); + + /// Whether the scanner has completely consumed [string]. + bool get isDone => position == string.length; + + /// Creates a new [StringScanner] that starts scanning from [position]. + /// + /// [position] defaults to 0, the beginning of the string. [sourceUrl] is the + /// URL of the source of the string being scanned, if available. It can be + /// a [String], a [Uri], or `null`. + StringScanner(this.string, {Object? sourceUrl, int? position}) + : sourceUrl = sourceUrl == null + ? null + : sourceUrl is String + ? Uri.parse(sourceUrl) + : sourceUrl as Uri { + if (position != null) this.position = position; + } + + /// Consumes a single character and returns its character code. + /// + /// This throws a [FormatException] if the string has been fully consumed. It + /// doesn't affect [lastMatch]. + int readChar() { + if (isDone) _fail('more input'); + return string.codeUnitAt(_position++); + } + + /// Returns the character code of the character [offset] away from [position]. + /// + /// [offset] defaults to zero, and may be negative to inspect already-consumed + /// characters. + /// + /// This returns `null` if [offset] points outside the string. It doesn't + /// affect [lastMatch]. + int? peekChar([int? offset]) { + offset ??= 0; + final index = position + offset; + if (index < 0 || index >= string.length) return null; + return string.codeUnitAt(index); + } + + /// If the next character in the string is [character], consumes it. + /// + /// If [character] is a Unicode code point in a supplementary plane, this will + /// consume two code units. Dart's string representation is UTF-16, which + /// represents supplementary-plane code units as two code units. + /// + /// Returns whether or not [character] was consumed. + bool scanChar(int character) { + if (inSupplementaryPlane(character)) { + if (_position + 1 >= string.length || + string.codeUnitAt(_position) != highSurrogate(character) || + string.codeUnitAt(_position + 1) != lowSurrogate(character)) { + return false; + } else { + _position += 2; + return true; + } + } else { + if (isDone) return false; + if (string.codeUnitAt(_position) != character) return false; + _position++; + return true; + } + } + + /// If the next character in the string is [character], consumes it. + /// + /// If [character] is a Unicode code point in a supplementary plane, this will + /// consume two code units. Dart's string representation is UTF-16, which + /// represents supplementary-plane code units as two code units. + /// + /// If [character] could not be consumed, throws a [FormatException] + /// describing the position of the failure. [name] is used in this error as + /// the expected name of the character being matched; if it's `null`, the + /// character itself is used instead. + void expectChar(int character, {String? name}) { + if (scanChar(character)) return; + + if (name == null) { + if (character == $backslash) { + name = r'"\"'; + } else if (character == $doubleQuote) { + name = r'"\""'; + } else { + name = '"${String.fromCharCode(character)}"'; + } + } + + _fail(name); + } + + /// Consumes a single Unicode code unit and returns it. + /// + /// This works like [readChar], except that it automatically handles UTF-16 + /// surrogate pairs. Specifically, if the next two code units form a surrogate + /// pair, consumes them both and returns the corresponding Unicode code point. + /// + /// If next two characters are not a surrogate pair, the next code unit is + /// returned as-is, even if it's an unpaired surrogate. + int readCodePoint() { + final first = readChar(); + if (!isHighSurrogate(first)) return first; + + final next = peekChar(); + if (next == null || !isLowSurrogate(next)) return first; + + readChar(); + return decodeSurrogatePair(first, next); + } + + /// Returns the Unicode code point immediately after [position]. + /// + /// This works like [peekChar], except that it automatically handles UTF-16 + /// surrogate pairs. Specifically, if the next two code units form a surrogate + /// pair, returns the corresponding Unicode code point. + /// + /// If next two characters are not a surrogate pair, the next code unit is + /// returned as-is, even if it's an unpaired surrogate. + int? peekCodePoint() { + final first = peekChar(); + if (first == null || !isHighSurrogate(first)) return first; + + final next = peekChar(1); + if (next == null || !isLowSurrogate(next)) return first; + + return decodeSurrogatePair(first, next); + } + + /// If [pattern] matches at the current position of the string, scans forward + /// until the end of the match. + /// + /// Returns whether or not [pattern] matched. + bool scan(Pattern pattern) { + final success = matches(pattern); + if (success) { + _position = _lastMatch!.end; + _lastMatchPosition = _position; + } + return success; + } + + /// If [pattern] matches at the current position of the string, scans forward + /// until the end of the match. + /// + /// If [pattern] did not match, throws a [FormatException] describing the + /// position of the failure. [name] is used in this error as the expected name + /// of the pattern being matched; if it's `null`, the pattern itself is used + /// instead. + void expect(Pattern pattern, {String? name}) { + if (scan(pattern)) return; + + if (name == null) { + if (pattern is RegExp) { + final source = pattern.pattern; + name = '/$source/'; + } else { + name = + pattern.toString().replaceAll(r'\', r'\\').replaceAll('"', r'\"'); + name = '"$name"'; + } + } + _fail(name); + } + + /// If the string has not been fully consumed, this throws a + /// [FormatException]. + void expectDone() { + if (isDone) return; + _fail('no more input'); + } + + /// Returns whether or not [pattern] matches at the current position of the + /// string. + /// + /// This doesn't move the scan pointer forward. + bool matches(Pattern pattern) { + _lastMatch = pattern.matchAsPrefix(string, position); + _lastMatchPosition = _position; + return _lastMatch != null; + } + + /// Returns the substring of [string] between [start] and [end]. + /// + /// Unlike [String.substring], [end] defaults to [position] rather than the + /// end of the string. + String substring(int start, [int? end]) { + end ??= position; + return string.substring(start, end); + } + + /// Throws a [FormatException] with [message] as well as a detailed + /// description of the location of the error in the string. + /// + /// [match] is the match information for the span of the string with which the + /// error is associated. This should be a match returned by this scanner's + /// [lastMatch] property. By default, the error is associated with the last + /// match. + /// + /// If [position] and/or [length] are passed, they are used as the error span + /// instead. If only [length] is passed, [position] defaults to the current + /// position; if only [position] is passed, [length] defaults to 0. + /// + /// It's an error to pass [match] at the same time as [position] or [length]. + Never error(String message, {Match? match, int? position, int? length}) { + validateErrorArgs(string, match, position, length); + + if (match == null && position == null && length == null) match = lastMatch; + position ??= match == null ? this.position : match.start; + length ??= match == null ? 0 : match.end - match.start; + + final sourceFile = SourceFile.fromString(string, url: sourceUrl); + final span = sourceFile.span(position, position + length); + throw StringScannerException(message, span, string); + } + + // TODO(nweiz): Make this handle long lines more gracefully. + /// Throws a [FormatException] describing that [name] is expected at the + /// current position in the string. + Never _fail(String name) { + error('expected $name.', position: position, length: 0); + } +} diff --git a/pkgs/string_scanner/lib/src/utils.dart b/pkgs/string_scanner/lib/src/utils.dart new file mode 100644 index 000000000..39891a16a --- /dev/null +++ b/pkgs/string_scanner/lib/src/utils.dart @@ -0,0 +1,95 @@ +// Copyright (c) 2014, the Dart project authors. Please see the AUTHORS file +// for details. All rights reserved. Use of this source code is governed by a +// BSD-style license that can be found in the LICENSE file. + +import 'string_scanner.dart'; + +/// Validates the arguments passed to [StringScanner.error]. +void validateErrorArgs( + String string, Match? match, int? position, int? length) { + if (match != null && (position != null || length != null)) { + throw ArgumentError("Can't pass both match and position/length."); + } + + if (position != null) { + if (position < 0) { + throw RangeError('position must be greater than or equal to 0.'); + } else if (position > string.length) { + throw RangeError('position must be less than or equal to the ' + 'string length.'); + } + } + + if (length != null && length < 0) { + throw RangeError('length must be greater than or equal to 0.'); + } + + if (position != null && length != null && position + length > string.length) { + throw RangeError('position plus length must not go beyond the end of ' + 'the string.'); + } +} + +// See https://en.wikipedia.org/wiki/UTF-16#Code_points_from_U+010000_to_U+10FFFF +// for documentation on how UTF-16 encoding works and definitions of various +// related terms. + +/// The inclusive lower bound of Unicode's supplementary plane. +const _supplementaryPlaneLowerBound = 0x10000; + +/// The inclusive upper bound of Unicode's supplementary plane. +const _supplementaryPlaneUpperBound = 0x10FFFF; + +/// The inclusive lower bound of the UTF-16 high surrogate block. +const _highSurrogateLowerBound = 0xD800; + +/// The inclusive lower bound of the UTF-16 low surrogate block. +const _lowSurrogateLowerBound = 0xDC00; + +/// The number of low bits in each code unit of a surrogate pair that goes into +/// determining which code point it encodes. +const _surrogateBits = 10; + +/// A bit mask that covers the lower [_surrogateBits] of a code point, which can +/// be used to extract the value of a surrogate or the low surrogate value of a +/// code unit. +const _surrogateValueMask = (1 << _surrogateBits) - 1; + +/// Returns whether [codePoint] is in the Unicode supplementary plane, and thus +/// must be represented as a surrogate pair in UTF-16. +bool inSupplementaryPlane(int codePoint) => + codePoint >= _supplementaryPlaneLowerBound && + codePoint <= _supplementaryPlaneUpperBound; + +/// Returns whether [codeUnit] is a UTF-16 high surrogate. +bool isHighSurrogate(int codeUnit) => + (codeUnit & ~_surrogateValueMask) == _highSurrogateLowerBound; + +/// Returns whether [codeUnit] is a UTF-16 low surrogate. +bool isLowSurrogate(int codeUnit) => + (codeUnit >> _surrogateBits) == (_lowSurrogateLowerBound >> _surrogateBits); + +/// Returns the high surrogate needed to encode the supplementary-plane +/// [codePoint]. +int highSurrogate(int codePoint) { + assert(inSupplementaryPlane(codePoint)); + return ((codePoint - _supplementaryPlaneLowerBound) >> _surrogateBits) + + _highSurrogateLowerBound; +} + +/// Returns the low surrogate needed to encode the supplementary-plane +/// [codePoint]. +int lowSurrogate(int codePoint) { + assert(inSupplementaryPlane(codePoint)); + return ((codePoint - _supplementaryPlaneLowerBound) & _surrogateValueMask) + + _lowSurrogateLowerBound; +} + +/// Converts a UTF-16 surrogate pair into the Unicode code unit it represents. +int decodeSurrogatePair(int highSurrogate, int lowSurrogate) { + assert(isHighSurrogate(highSurrogate)); + assert(isLowSurrogate(lowSurrogate)); + return _supplementaryPlaneLowerBound + + (((highSurrogate & _surrogateValueMask) << _surrogateBits) | + (lowSurrogate & _surrogateValueMask)); +} diff --git a/pkgs/string_scanner/lib/string_scanner.dart b/pkgs/string_scanner/lib/string_scanner.dart new file mode 100644 index 000000000..e641ae706 --- /dev/null +++ b/pkgs/string_scanner/lib/string_scanner.dart @@ -0,0 +1,11 @@ +// Copyright (c) 2014, the Dart project authors. Please see the AUTHORS file +// for details. All rights reserved. Use of this source code is governed by a +// BSD-style license that can be found in the LICENSE file. + +/// A library for parsing strings using a sequence of patterns. +library; + +export 'src/exception.dart'; +export 'src/line_scanner.dart'; +export 'src/span_scanner.dart'; +export 'src/string_scanner.dart'; diff --git a/pkgs/string_scanner/pubspec.yaml b/pkgs/string_scanner/pubspec.yaml new file mode 100644 index 000000000..9b259cf7b --- /dev/null +++ b/pkgs/string_scanner/pubspec.yaml @@ -0,0 +1,14 @@ +name: string_scanner +version: 1.4.1 +description: A class for parsing strings using a sequence of patterns. +repository: https://github.com/dart-lang/tools/tree/main/pkgs/string_scanner + +environment: + sdk: ^3.1.0 + +dependencies: + source_span: ^1.8.0 + +dev_dependencies: + dart_flutter_team_lints: ^3.0.0 + test: ^1.16.6 diff --git a/pkgs/string_scanner/test/error_test.dart b/pkgs/string_scanner/test/error_test.dart new file mode 100644 index 000000000..1f98c3243 --- /dev/null +++ b/pkgs/string_scanner/test/error_test.dart @@ -0,0 +1,143 @@ +// Copyright (c) 2014, the Dart project authors. Please see the AUTHORS file +// for details. All rights reserved. Use of this source code is governed by a +// BSD-style license that can be found in the LICENSE file. + +import 'package:string_scanner/string_scanner.dart'; +import 'package:test/test.dart'; + +import 'utils.dart'; + +void main() { + test('defaults to the last match', () { + final scanner = StringScanner('foo bar baz'); + scanner.expect('foo '); + scanner.expect('bar'); + expect(() => scanner.error('oh no!'), throwsStringScannerException('bar')); + }); + + group('with match', () { + test('supports an earlier match', () { + final scanner = StringScanner('foo bar baz'); + scanner.expect('foo '); + final match = scanner.lastMatch; + scanner.expect('bar'); + expect(() => scanner.error('oh no!', match: match), + throwsStringScannerException('foo ')); + }); + + test('supports a match on a previous line', () { + final scanner = StringScanner('foo bar baz\ndo re mi\nearth fire water'); + scanner.expect('foo bar baz\ndo '); + scanner.expect('re'); + final match = scanner.lastMatch; + scanner.expect(' mi\nearth '); + expect(() => scanner.error('oh no!', match: match), + throwsStringScannerException('re')); + }); + + test('supports a multiline match', () { + final scanner = StringScanner('foo bar baz\ndo re mi\nearth fire water'); + scanner.expect('foo bar '); + scanner.expect('baz\ndo'); + final match = scanner.lastMatch; + scanner.expect(' re mi'); + expect(() => scanner.error('oh no!', match: match), + throwsStringScannerException('baz\ndo')); + }); + + test('supports a match after position', () { + final scanner = StringScanner('foo bar baz'); + scanner.expect('foo '); + scanner.expect('bar'); + final match = scanner.lastMatch; + scanner.position = 0; + expect(() => scanner.error('oh no!', match: match), + throwsStringScannerException('bar')); + }); + }); + + group('with position and/or length', () { + test('defaults to length 0', () { + final scanner = StringScanner('foo bar baz'); + scanner.expect('foo '); + expect(() => scanner.error('oh no!', position: 1), + throwsStringScannerException('')); + }); + + test('defaults to the current position', () { + final scanner = StringScanner('foo bar baz'); + scanner.expect('foo '); + expect(() => scanner.error('oh no!', length: 3), + throwsStringScannerException('bar')); + }); + + test('supports an earlier position', () { + final scanner = StringScanner('foo bar baz'); + scanner.expect('foo '); + expect(() => scanner.error('oh no!', position: 1, length: 2), + throwsStringScannerException('oo')); + }); + + test('supports a position on a previous line', () { + final scanner = StringScanner('foo bar baz\ndo re mi\nearth fire water'); + scanner.expect('foo bar baz\ndo re mi\nearth'); + expect(() => scanner.error('oh no!', position: 15, length: 2), + throwsStringScannerException('re')); + }); + + test('supports a multiline length', () { + final scanner = StringScanner('foo bar baz\ndo re mi\nearth fire water'); + scanner.expect('foo bar baz\ndo re mi\nearth'); + expect(() => scanner.error('oh no!', position: 8, length: 8), + throwsStringScannerException('baz\ndo r')); + }); + + test('supports a position after the current one', () { + final scanner = StringScanner('foo bar baz'); + expect(() => scanner.error('oh no!', position: 4, length: 3), + throwsStringScannerException('bar')); + }); + + test('supports a length of zero', () { + final scanner = StringScanner('foo bar baz'); + expect(() => scanner.error('oh no!', position: 4, length: 0), + throwsStringScannerException('')); + }); + }); + + group('argument errors', () { + late StringScanner scanner; + setUp(() { + scanner = StringScanner('foo bar baz'); + scanner.scan('foo'); + }); + + test('if match is passed with position', () { + expect( + () => scanner.error('oh no!', match: scanner.lastMatch, position: 1), + throwsArgumentError); + }); + + test('if match is passed with length', () { + expect(() => scanner.error('oh no!', match: scanner.lastMatch, length: 1), + throwsArgumentError); + }); + + test('if position is negative', () { + expect(() => scanner.error('oh no!', position: -1), throwsArgumentError); + }); + + test('if position is outside the string', () { + expect(() => scanner.error('oh no!', position: 100), throwsArgumentError); + }); + + test('if position + length is outside the string', () { + expect(() => scanner.error('oh no!', position: 7, length: 7), + throwsArgumentError); + }); + + test('if length is negative', () { + expect(() => scanner.error('oh no!', length: -1), throwsArgumentError); + }); + }); +} diff --git a/pkgs/string_scanner/test/line_scanner_test.dart b/pkgs/string_scanner/test/line_scanner_test.dart new file mode 100644 index 000000000..1af5c3666 --- /dev/null +++ b/pkgs/string_scanner/test/line_scanner_test.dart @@ -0,0 +1,465 @@ +// Copyright (c) 2014, the Dart project authors. Please see the AUTHORS file +// for details. All rights reserved. Use of this source code is governed by a +// BSD-style license that can be found in the LICENSE file. + +import 'package:string_scanner/src/charcode.dart'; +import 'package:string_scanner/string_scanner.dart'; +import 'package:test/test.dart'; + +void main() { + late LineScanner scanner; + setUp(() { + scanner = LineScanner('foo\nbar\r\nbaz'); + }); + + test('begins with line and column 0', () { + expect(scanner.line, equals(0)); + expect(scanner.column, equals(0)); + }); + + group('scan()', () { + test('consuming no newlines increases the column but not the line', () { + scanner.expect('foo'); + expect(scanner.line, equals(0)); + expect(scanner.column, equals(3)); + }); + + test('consuming a LF resets the column and increases the line', () { + scanner.expect('foo\nba'); + expect(scanner.line, equals(1)); + expect(scanner.column, equals(2)); + }); + + test('consuming multiple LFs resets the column and increases the line', () { + scanner.expect('foo\nbar\r\nb'); + expect(scanner.line, equals(2)); + expect(scanner.column, equals(1)); + }); + + test('consuming a CR LF increases the line only after the LF', () { + scanner.expect('foo\nbar\r'); + expect(scanner.line, equals(1)); + expect(scanner.column, equals(4)); + + scanner.expect('\nb'); + expect(scanner.line, equals(2)); + expect(scanner.column, equals(1)); + }); + + test('consuming a CR not followed by LF increases the line', () { + scanner = LineScanner('foo\nbar\rbaz'); + scanner.expect('foo\nbar\r'); + expect(scanner.line, equals(2)); + expect(scanner.column, equals(0)); + + scanner.expect('b'); + expect(scanner.line, equals(2)); + expect(scanner.column, equals(1)); + }); + + test('consuming a CR at the end increases the line', () { + scanner = LineScanner('foo\nbar\r'); + scanner.expect('foo\nbar\r'); + expect(scanner.line, equals(2)); + expect(scanner.column, equals(0)); + expect(scanner.isDone, isTrue); + }); + + test('consuming a mix of CR, LF, CR+LF increases the line', () { + scanner = LineScanner('0\n1\r2\r\n3'); + scanner.expect('0\n1\r2\r\n3'); + expect(scanner.line, equals(3)); + expect(scanner.column, equals(1)); + }); + + test('scanning a zero length match between CR LF does not fail', () { + scanner.expect('foo\nbar\r'); + expect(scanner.line, equals(1)); + expect(scanner.column, equals(4)); + scanner.expect(RegExp('(?!x)')); + expect(scanner.line, equals(1)); + expect(scanner.column, equals(4)); + }); + }); + + group('readChar()', () { + test('on a non-newline character increases the column but not the line', + () { + scanner.readChar(); + expect(scanner.line, equals(0)); + expect(scanner.column, equals(1)); + }); + + test('consuming a LF resets the column and increases the line', () { + scanner.expect('foo'); + expect(scanner.line, equals(0)); + expect(scanner.column, equals(3)); + + scanner.readChar(); + expect(scanner.line, equals(1)); + expect(scanner.column, equals(0)); + }); + + test('consuming a CR LF increases the line only after the LF', () { + scanner = LineScanner('foo\r\nbar'); + scanner.expect('foo'); + expect(scanner.line, equals(0)); + expect(scanner.column, equals(3)); + + scanner.readChar(); + expect(scanner.line, equals(0)); + expect(scanner.column, equals(4)); + + scanner.readChar(); + expect(scanner.line, equals(1)); + expect(scanner.column, equals(0)); + }); + + test('consuming a CR not followed by a LF increases the line', () { + scanner = LineScanner('foo\nbar\rbaz'); + scanner.expect('foo\nbar'); + expect(scanner.line, equals(1)); + expect(scanner.column, equals(3)); + + scanner.readChar(); + expect(scanner.line, equals(2)); + expect(scanner.column, equals(0)); + }); + + test('consuming a CR at the end increases the line', () { + scanner = LineScanner('foo\nbar\r'); + scanner.expect('foo\nbar'); + expect(scanner.line, equals(1)); + expect(scanner.column, equals(3)); + + scanner.readChar(); + expect(scanner.line, equals(2)); + expect(scanner.column, equals(0)); + }); + + test('consuming a mix of CR, LF, CR+LF increases the line', () { + scanner = LineScanner('0\n1\r2\r\n3'); + for (var i = 0; i < scanner.string.length; i++) { + scanner.readChar(); + } + + expect(scanner.line, equals(3)); + expect(scanner.column, equals(1)); + }); + }); + + group('readCodePoint()', () { + test('on a non-newline character increases the column but not the line', + () { + scanner.readCodePoint(); + expect(scanner.line, equals(0)); + expect(scanner.column, equals(1)); + }); + + test('consuming a newline resets the column and increases the line', () { + scanner.expect('foo'); + expect(scanner.line, equals(0)); + expect(scanner.column, equals(3)); + + scanner.readCodePoint(); + expect(scanner.line, equals(1)); + expect(scanner.column, equals(0)); + }); + + test("consuming halfway through a CR LF doesn't count as a line", () { + scanner.expect('foo\nbar'); + expect(scanner.line, equals(1)); + expect(scanner.column, equals(3)); + + scanner.readCodePoint(); + expect(scanner.line, equals(1)); + expect(scanner.column, equals(4)); + + scanner.readCodePoint(); + expect(scanner.line, equals(2)); + expect(scanner.column, equals(0)); + }); + }); + + group('scanChar()', () { + test('on a non-newline character increases the column but not the line', + () { + scanner.scanChar($f); + expect(scanner.line, equals(0)); + expect(scanner.column, equals(1)); + }); + + test('consuming a LF resets the column and increases the line', () { + scanner.expect('foo'); + expect(scanner.line, equals(0)); + expect(scanner.column, equals(3)); + + scanner.scanChar($lf); + expect(scanner.line, equals(1)); + expect(scanner.column, equals(0)); + }); + + test('consuming a CR LF increases the line only after the LF', () { + scanner.expect('foo\nbar'); + expect(scanner.line, equals(1)); + expect(scanner.column, equals(3)); + + scanner.scanChar($cr); + expect(scanner.line, equals(1)); + expect(scanner.column, equals(4)); + + scanner.scanChar($lf); + expect(scanner.line, equals(2)); + expect(scanner.column, equals(0)); + }); + + test('consuming a CR not followed by LF increases the line', () { + scanner = LineScanner('foo\rbar'); + scanner.expect('foo'); + expect(scanner.line, equals(0)); + expect(scanner.column, equals(3)); + + scanner.scanChar($cr); + expect(scanner.line, equals(1)); + expect(scanner.column, equals(0)); + }); + + test('consuming a CR at the end increases the line', () { + scanner = LineScanner('foo\r'); + scanner.expect('foo'); + expect(scanner.line, equals(0)); + expect(scanner.column, equals(3)); + + scanner.scanChar($cr); + expect(scanner.line, equals(1)); + expect(scanner.column, equals(0)); + }); + + test('consuming a mix of CR, LF, CR+LF increases the line', () { + scanner = LineScanner('0\n1\r2\r\n3'); + for (var i = 0; i < scanner.string.length; i++) { + scanner.scanChar(scanner.string[i].codeUnits.single); + } + + expect(scanner.line, equals(3)); + expect(scanner.column, equals(1)); + }); + }); + + group('before a surrogate pair', () { + final codePoint = '\uD83D\uDC6D'.runes.first; + const highSurrogate = 0xD83D; + + late LineScanner scanner; + setUp(() { + scanner = LineScanner('foo: \uD83D\uDC6D'); + expect(scanner.scan('foo: '), isTrue); + }); + + test('readChar returns the high surrogate and moves into the pair', () { + expect(scanner.readChar(), equals(highSurrogate)); + expect(scanner.line, equals(0)); + expect(scanner.column, equals(6)); + expect(scanner.position, equals(6)); + }); + + test('readCodePoint returns the code unit and moves past the pair', () { + expect(scanner.readCodePoint(), equals(codePoint)); + expect(scanner.line, equals(0)); + expect(scanner.column, equals(7)); + expect(scanner.position, equals(7)); + }); + + test('scanChar with the high surrogate moves into the pair', () { + expect(scanner.scanChar(highSurrogate), isTrue); + expect(scanner.line, equals(0)); + expect(scanner.column, equals(6)); + expect(scanner.position, equals(6)); + }); + + test('scanChar with the code point moves past the pair', () { + expect(scanner.scanChar(codePoint), isTrue); + expect(scanner.line, equals(0)); + expect(scanner.column, equals(7)); + expect(scanner.position, equals(7)); + }); + + test('expectChar with the high surrogate moves into the pair', () { + scanner.expectChar(highSurrogate); + expect(scanner.line, equals(0)); + expect(scanner.column, equals(6)); + expect(scanner.position, equals(6)); + }); + + test('expectChar with the code point moves past the pair', () { + scanner.expectChar(codePoint); + expect(scanner.line, equals(0)); + expect(scanner.column, equals(7)); + expect(scanner.position, equals(7)); + }); + }); + + group('position=', () { + test('forward through LFs sets the line and column', () { + scanner = LineScanner('foo\nbar\nbaz'); + scanner.position = 9; // "foo\nbar\nb" + expect(scanner.line, equals(2)); + expect(scanner.column, equals(1)); + }); + + test('forward from non-zero character through LFs sets the line and column', + () { + scanner = LineScanner('foo\nbar\nbaz'); + scanner.expect('fo'); + scanner.position = 9; // "foo\nbar\nb" + expect(scanner.line, equals(2)); + expect(scanner.column, equals(1)); + }); + + test('forward through CR LFs sets the line and column', () { + scanner = LineScanner('foo\r\nbar\r\nbaz'); + scanner.position = 11; // "foo\r\nbar\r\nb" + expect(scanner.line, equals(2)); + expect(scanner.column, equals(1)); + }); + + test('forward through CR not followed by LFs sets the line and column', () { + scanner = LineScanner('foo\rbar\rbaz'); + scanner.position = 9; // "foo\rbar\rb" + expect(scanner.line, equals(2)); + expect(scanner.column, equals(1)); + }); + + test('forward through CR at end sets the line and column', () { + scanner = LineScanner('foo\rbar\r'); + scanner.position = 8; // "foo\rbar\r" + expect(scanner.line, equals(2)); + expect(scanner.column, equals(0)); + }); + + test('forward through a mix of CR, LF, CR+LF sets the line and column', () { + scanner = LineScanner('0\n1\r2\r\n3'); + scanner.position = scanner.string.length; + + expect(scanner.line, equals(3)); + expect(scanner.column, equals(1)); + }); + + test('forward through no newlines sets the column', () { + scanner.position = 2; // "fo" + expect(scanner.line, equals(0)); + expect(scanner.column, equals(2)); + }); + + test('backward through LFs sets the line and column', () { + scanner = LineScanner('foo\nbar\nbaz'); + scanner.expect('foo\nbar\nbaz'); + scanner.position = 2; // "fo" + expect(scanner.line, equals(0)); + expect(scanner.column, equals(2)); + }); + + test('backward through CR LFs sets the line and column', () { + scanner = LineScanner('foo\r\nbar\r\nbaz'); + scanner.expect('foo\r\nbar\r\nbaz'); + scanner.position = 2; // "fo" + expect(scanner.line, equals(0)); + expect(scanner.column, equals(2)); + }); + + test('backward through CR not followed by LFs sets the line and column', + () { + scanner = LineScanner('foo\rbar\rbaz'); + scanner.expect('foo\rbar\rbaz'); + scanner.position = 2; // "fo" + expect(scanner.line, equals(0)); + expect(scanner.column, equals(2)); + }); + + test('backward through CR at end sets the line and column', () { + scanner = LineScanner('foo\rbar\r'); + scanner.expect('foo\rbar\r'); + scanner.position = 2; // "fo" + expect(scanner.line, equals(0)); + expect(scanner.column, equals(2)); + }); + + test('backward through a mix of CR, LF, CR+LF sets the line and column', + () { + scanner = LineScanner('0\n1\r2\r\n3'); + scanner.expect(scanner.string); + + scanner.position = 1; + expect(scanner.line, equals(0)); + expect(scanner.column, equals(1)); + }); + + test('backward through no newlines sets the column', () { + scanner.expect('foo\nbar\r\nbaz'); + scanner.position = 10; // "foo\nbar\r\nb" + expect(scanner.line, equals(2)); + expect(scanner.column, equals(1)); + }); + + test("forward halfway through a CR LF doesn't count as a line", () { + scanner.position = 8; // "foo\nbar\r" + expect(scanner.line, equals(1)); + expect(scanner.column, equals(4)); + }); + + test('forward from halfway through a CR LF counts as a line', () { + scanner.expect('foo\nbar\r'); + scanner.position = 11; // "foo\nbar\r\nba" + expect(scanner.line, equals(2)); + expect(scanner.column, equals(2)); + }); + + test('backward to between CR LF', () { + scanner.expect('foo\nbar\r\nbaz'); + scanner.position = 8; // "foo\nbar\r" + expect(scanner.line, equals(1)); + expect(scanner.column, equals(4)); + }); + + test('backward from between CR LF', () { + scanner.expect('foo\nbar\r'); + expect(scanner.line, equals(1)); + expect(scanner.column, equals(4)); + scanner.position = 5; // "foo\nb" + expect(scanner.line, equals(1)); + expect(scanner.column, equals(1)); + }); + + test('backward to after CR LF', () { + scanner.expect('foo\nbar\r\nbaz'); + scanner.position = 9; // "foo\nbar\r\n" + expect(scanner.line, equals(2)); + expect(scanner.column, equals(0)); + }); + + test('backward to before CR LF', () { + scanner.expect('foo\nbar\r\nbaz'); + scanner.position = 7; // "foo\nbar" + expect(scanner.line, equals(1)); + expect(scanner.column, equals(3)); + }); + }); + + test('state= restores the line, column, and position', () { + scanner.expect('foo\nb'); + final state = scanner.state; + + scanner.scan('ar\nba'); + scanner.state = state; + expect(scanner.rest, equals('ar\r\nbaz')); + expect(scanner.line, equals(1)); + expect(scanner.column, equals(1)); + }); + + test('state= rejects a foreign state', () { + scanner.scan('foo\nb'); + + expect(() => LineScanner(scanner.string).state = scanner.state, + throwsArgumentError); + }); +} diff --git a/pkgs/string_scanner/test/span_scanner_test.dart b/pkgs/string_scanner/test/span_scanner_test.dart new file mode 100644 index 000000000..93d9c477c --- /dev/null +++ b/pkgs/string_scanner/test/span_scanner_test.dart @@ -0,0 +1,238 @@ +// Copyright (c) 2014, the Dart project authors. Please see the AUTHORS file +// for details. All rights reserved. Use of this source code is governed by a +// BSD-style license that can be found in the LICENSE file. + +import 'package:source_span/source_span.dart'; +import 'package:string_scanner/string_scanner.dart'; +import 'package:test/test.dart'; + +import 'utils.dart'; + +void main() { + testForImplementation( + 'lazy', + ([String? string]) => + SpanScanner(string ?? 'foo\nbar\nbaz', sourceUrl: 'source')); + + testForImplementation( + 'eager', + ([String? string]) => + SpanScanner.eager(string ?? 'foo\nbar\nbaz', sourceUrl: 'source')); + + group('within', () { + const text = 'first\nbefore: foo\nbar\nbaz :after\nlast'; + final startOffset = text.indexOf('foo'); + + late SpanScanner scanner; + setUp(() { + final file = SourceFile.fromString(text, url: 'source'); + scanner = + SpanScanner.within(file.span(startOffset, text.indexOf(' :after'))); + }); + + test('string only includes the span text', () { + expect(scanner.string, equals('foo\nbar\nbaz')); + }); + + test('line and column are span-relative', () { + expect(scanner.line, equals(0)); + expect(scanner.column, equals(0)); + + scanner.scan('foo'); + expect(scanner.line, equals(0)); + expect(scanner.column, equals(3)); + + scanner.scan('\n'); + expect(scanner.line, equals(1)); + expect(scanner.column, equals(0)); + }); + + test('tracks the span for the last match', () { + scanner.scan('fo'); + scanner.scan('o\nba'); + + final span = scanner.lastSpan!; + expect(span.start.offset, equals(startOffset + 2)); + expect(span.start.line, equals(1)); + expect(span.start.column, equals(10)); + expect(span.start.sourceUrl, equals(Uri.parse('source'))); + + expect(span.end.offset, equals(startOffset + 6)); + expect(span.end.line, equals(2)); + expect(span.end.column, equals(2)); + expect(span.start.sourceUrl, equals(Uri.parse('source'))); + + expect(span.text, equals('o\nba')); + }); + + test('.spanFrom() returns a span from a previous state', () { + scanner.scan('fo'); + final state = scanner.state; + scanner.scan('o\nba'); + scanner.scan('r\nba'); + + final span = scanner.spanFrom(state); + expect(span.text, equals('o\nbar\nba')); + }); + + test('.spanFromPosition() returns a span from a previous state', () { + scanner.scan('fo'); + final start = scanner.position; + scanner.scan('o\nba'); + scanner.scan('r\nba'); + + final span = scanner.spanFromPosition(start + 2, start + 5); + expect(span.text, equals('bar')); + }); + + test('.emptySpan returns an empty span at the current location', () { + scanner.scan('foo\nba'); + + final span = scanner.emptySpan; + expect(span.start.offset, equals(startOffset + 6)); + expect(span.start.line, equals(2)); + expect(span.start.column, equals(2)); + expect(span.start.sourceUrl, equals(Uri.parse('source'))); + + expect(span.end.offset, equals(startOffset + 6)); + expect(span.end.line, equals(2)); + expect(span.end.column, equals(2)); + expect(span.start.sourceUrl, equals(Uri.parse('source'))); + + expect(span.text, equals('')); + }); + + test('.error() uses an absolute span', () { + scanner.expect('foo'); + expect( + () => scanner.error('oh no!'), throwsStringScannerException('foo')); + }); + + test('.isDone returns true at the end of the span', () { + scanner.expect('foo\nbar\nbaz'); + expect(scanner.isDone, isTrue); + }); + }); +} + +void testForImplementation( + String name, SpanScanner Function([String string]) create) { + group('for a $name scanner', () { + late SpanScanner scanner; + setUp(() => scanner = create()); + + test('tracks the span for the last match', () { + scanner.scan('fo'); + scanner.scan('o\nba'); + + final span = scanner.lastSpan!; + expect(span.start.offset, equals(2)); + expect(span.start.line, equals(0)); + expect(span.start.column, equals(2)); + expect(span.start.sourceUrl, equals(Uri.parse('source'))); + + expect(span.end.offset, equals(6)); + expect(span.end.line, equals(1)); + expect(span.end.column, equals(2)); + expect(span.start.sourceUrl, equals(Uri.parse('source'))); + + expect(span.text, equals('o\nba')); + }); + + test('.spanFrom() returns a span from a previous state', () { + scanner.scan('fo'); + final state = scanner.state; + scanner.scan('o\nba'); + scanner.scan('r\nba'); + + final span = scanner.spanFrom(state); + expect(span.text, equals('o\nbar\nba')); + }); + + test('.spanFromPosition() returns a span from a previous state', () { + scanner.scan('fo'); + final start = scanner.position; + scanner.scan('o\nba'); + scanner.scan('r\nba'); + + final span = scanner.spanFromPosition(start + 2, start + 5); + expect(span.text, equals('bar')); + }); + + test('.emptySpan returns an empty span at the current location', () { + scanner.scan('foo\nba'); + + final span = scanner.emptySpan; + expect(span.start.offset, equals(6)); + expect(span.start.line, equals(1)); + expect(span.start.column, equals(2)); + expect(span.start.sourceUrl, equals(Uri.parse('source'))); + + expect(span.end.offset, equals(6)); + expect(span.end.line, equals(1)); + expect(span.end.column, equals(2)); + expect(span.start.sourceUrl, equals(Uri.parse('source'))); + + expect(span.text, equals('')); + }); + + group('before a surrogate pair', () { + final codePoint = '\uD83D\uDC6D'.runes.first; + const highSurrogate = 0xD83D; + + late SpanScanner scanner; + setUp(() { + scanner = create('foo: \uD83D\uDC6D bar'); + expect(scanner.scan('foo: '), isTrue); + }); + + test('readChar returns the high surrogate and moves into the pair', () { + expect(scanner.readChar(), equals(highSurrogate)); + expect(scanner.line, equals(0)); + expect(scanner.column, equals(6)); + expect(scanner.position, equals(6)); + }); + + test('readCodePoint returns the code unit and moves past the pair', () { + expect(scanner.readCodePoint(), equals(codePoint)); + expect(scanner.line, equals(0)); + expect(scanner.column, equals(7)); + expect(scanner.position, equals(7)); + }); + + test('scanChar with the high surrogate moves into the pair', () { + expect(scanner.scanChar(highSurrogate), isTrue); + expect(scanner.line, equals(0)); + expect(scanner.column, equals(6)); + expect(scanner.position, equals(6)); + }); + + test('scanChar with the code point moves past the pair', () { + expect(scanner.scanChar(codePoint), isTrue); + expect(scanner.line, equals(0)); + expect(scanner.column, equals(7)); + expect(scanner.position, equals(7)); + }); + + test('expectChar with the high surrogate moves into the pair', () { + scanner.expectChar(highSurrogate); + expect(scanner.line, equals(0)); + expect(scanner.column, equals(6)); + expect(scanner.position, equals(6)); + }); + + test('expectChar with the code point moves past the pair', () { + scanner.expectChar(codePoint); + expect(scanner.line, equals(0)); + expect(scanner.column, equals(7)); + expect(scanner.position, equals(7)); + }); + + test('spanFrom covers the surrogate pair', () { + final state = scanner.state; + scanner.scan('\uD83D\uDC6D b'); + expect(scanner.spanFrom(state).text, equals('\uD83D\uDC6D b')); + }); + }); + }); +} diff --git a/pkgs/string_scanner/test/string_scanner_test.dart b/pkgs/string_scanner/test/string_scanner_test.dart new file mode 100644 index 000000000..36a737e36 --- /dev/null +++ b/pkgs/string_scanner/test/string_scanner_test.dart @@ -0,0 +1,564 @@ +// Copyright (c) 2014, the Dart project authors. Please see the AUTHORS file +// for details. All rights reserved. Use of this source code is governed by a +// BSD-style license that can be found in the LICENSE file. + +import 'package:string_scanner/src/charcode.dart'; +import 'package:string_scanner/string_scanner.dart'; +import 'package:test/test.dart'; + +void main() { + group('with an empty string', () { + late StringScanner scanner; + setUp(() { + scanner = StringScanner(''); + }); + + test('is done', () { + expect(scanner.isDone, isTrue); + expect(scanner.expectDone, isNot(throwsFormatException)); + }); + + test('rest is empty', () { + expect(scanner.rest, isEmpty); + }); + + test('lastMatch is null', () { + expect(scanner.lastMatch, isNull); + }); + + test('position is zero', () { + expect(scanner.position, equals(0)); + }); + + test("readChar fails and doesn't change the state", () { + expect(scanner.readChar, throwsFormatException); + expect(scanner.lastMatch, isNull); + expect(scanner.position, equals(0)); + }); + + test("readCodePoint fails and doesn't change the state", () { + expect(scanner.readCodePoint, throwsFormatException); + expect(scanner.lastMatch, isNull); + expect(scanner.position, equals(0)); + }); + + test("peekChar returns null and doesn't change the state", () { + expect(scanner.peekChar(), isNull); + expect(scanner.lastMatch, isNull); + expect(scanner.position, equals(0)); + }); + + test("peekCodePoint returns null and doesn't change the state", () { + expect(scanner.peekCodePoint(), isNull); + expect(scanner.lastMatch, isNull); + expect(scanner.position, equals(0)); + }); + + test("scanChar returns false and doesn't change the state", () { + expect(scanner.scanChar($f), isFalse); + expect(scanner.lastMatch, isNull); + expect(scanner.position, equals(0)); + }); + + test("expectChar fails and doesn't change the state", () { + expect(() => scanner.expectChar($f), throwsFormatException); + expect(scanner.lastMatch, isNull); + expect(scanner.position, equals(0)); + }); + + test("scan returns false and doesn't change the state", () { + expect(scanner.scan(RegExp('.')), isFalse); + expect(scanner.lastMatch, isNull); + expect(scanner.position, equals(0)); + }); + + test("expect throws a FormatException and doesn't change the state", () { + expect(() => scanner.expect(RegExp('.')), throwsFormatException); + expect(scanner.lastMatch, isNull); + expect(scanner.position, equals(0)); + }); + + test("matches returns false and doesn't change the state", () { + expect(scanner.matches(RegExp('.')), isFalse); + expect(scanner.lastMatch, isNull); + expect(scanner.position, equals(0)); + }); + + test('substring returns the empty string', () { + expect(scanner.substring(0), isEmpty); + }); + + test('setting position to 1 throws an ArgumentError', () { + expect(() { + scanner.position = 1; + }, throwsArgumentError); + }); + + test('setting position to -1 throws an ArgumentError', () { + expect(() { + scanner.position = -1; + }, throwsArgumentError); + }); + }); + + group('at the beginning of a string', () { + late StringScanner scanner; + setUp(() { + scanner = StringScanner('foo bar'); + }); + + test('is not done', () { + expect(scanner.isDone, isFalse); + expect(scanner.expectDone, throwsFormatException); + }); + + test('rest is the whole string', () { + expect(scanner.rest, equals('foo bar')); + }); + + test('lastMatch is null', () { + expect(scanner.lastMatch, isNull); + }); + + test('position is zero', () { + expect(scanner.position, equals(0)); + }); + + test('readChar returns the first character and moves forward', () { + expect(scanner.readChar(), equals(0x66)); + expect(scanner.lastMatch, isNull); + expect(scanner.position, equals(1)); + }); + + test('readCodePoint returns the first character and moves forward', () { + expect(scanner.readCodePoint(), equals(0x66)); + expect(scanner.lastMatch, isNull); + expect(scanner.position, equals(1)); + }); + + test('peekChar returns the first character', () { + expect(scanner.peekChar(), equals(0x66)); + expect(scanner.lastMatch, isNull); + expect(scanner.position, equals(0)); + }); + + test('peekChar with an argument returns the nth character', () { + expect(scanner.peekChar(4), equals(0x62)); + expect(scanner.lastMatch, isNull); + expect(scanner.position, equals(0)); + }); + + test('peekCodePoint returns the first character', () { + expect(scanner.peekCodePoint(), equals(0x66)); + expect(scanner.lastMatch, isNull); + expect(scanner.position, equals(0)); + }); + + test('a matching scanChar returns true moves forward', () { + expect(scanner.scanChar($f), isTrue); + expect(scanner.lastMatch, isNull); + expect(scanner.position, equals(1)); + }); + + test('a non-matching scanChar returns false and does nothing', () { + expect(scanner.scanChar($x), isFalse); + expect(scanner.lastMatch, isNull); + expect(scanner.position, equals(0)); + }); + + test('a matching expectChar moves forward', () { + scanner.expectChar($f); + expect(scanner.lastMatch, isNull); + expect(scanner.position, equals(1)); + }); + + test('a non-matching expectChar fails', () { + expect(() => scanner.expectChar($x), throwsFormatException); + expect(scanner.lastMatch, isNull); + expect(scanner.position, equals(0)); + }); + + test('a matching scan returns true and changes the state', () { + expect(scanner.scan(RegExp('f(..)')), isTrue); + expect(scanner.lastMatch![1], equals('oo')); + expect(scanner.position, equals(3)); + expect(scanner.rest, equals(' bar')); + }); + + test('a non-matching scan returns false and sets lastMatch to null', () { + expect(scanner.matches(RegExp('f(..)')), isTrue); + expect(scanner.lastMatch, isNotNull); + + expect(scanner.scan(RegExp('b(..)')), isFalse); + expect(scanner.lastMatch, isNull); + expect(scanner.position, equals(0)); + expect(scanner.rest, equals('foo bar')); + }); + + test('a matching expect changes the state', () { + scanner.expect(RegExp('f(..)')); + expect(scanner.lastMatch![1], equals('oo')); + expect(scanner.position, equals(3)); + expect(scanner.rest, equals(' bar')); + }); + + test( + 'a non-matching expect throws a FormatException and sets lastMatch to ' + 'null', () { + expect(scanner.matches(RegExp('f(..)')), isTrue); + expect(scanner.lastMatch, isNotNull); + + expect(() => scanner.expect(RegExp('b(..)')), throwsFormatException); + expect(scanner.lastMatch, isNull); + expect(scanner.position, equals(0)); + expect(scanner.rest, equals('foo bar')); + }); + + test('a matching matches returns true and only changes lastMatch', () { + expect(scanner.matches(RegExp('f(..)')), isTrue); + expect(scanner.lastMatch![1], equals('oo')); + expect(scanner.position, equals(0)); + expect(scanner.rest, equals('foo bar')); + }); + + test("a non-matching matches returns false and doesn't change the state", + () { + expect(scanner.matches(RegExp('b(..)')), isFalse); + expect(scanner.lastMatch, isNull); + expect(scanner.position, equals(0)); + expect(scanner.rest, equals('foo bar')); + }); + + test('substring from the beginning returns the empty string', () { + expect(scanner.substring(0), isEmpty); + }); + + test('substring with a custom end returns the substring', () { + expect(scanner.substring(0, 3), equals('foo')); + }); + + test('substring with the string length returns the whole string', () { + expect(scanner.substring(0, 7), equals('foo bar')); + }); + + test('setting position to 1 moves the cursor forward', () { + scanner.position = 1; + expect(scanner.position, equals(1)); + expect(scanner.rest, equals('oo bar')); + + expect(scanner.scan(RegExp('oo.')), isTrue); + expect(scanner.lastMatch![0], equals('oo ')); + expect(scanner.position, equals(4)); + expect(scanner.rest, equals('bar')); + }); + + test('setting position beyond the string throws an ArgumentError', () { + expect(() { + scanner.position = 8; + }, throwsArgumentError); + }); + + test('setting position to -1 throws an ArgumentError', () { + expect(() { + scanner.position = -1; + }, throwsArgumentError); + }); + + test('scan accepts any Pattern', () { + expect(scanner.scan('foo'), isTrue); + expect(scanner.lastMatch![0], equals('foo')); + expect(scanner.position, equals(3)); + expect(scanner.rest, equals(' bar')); + }); + + test('scans multiple times', () { + expect(scanner.scan(RegExp('f(..)')), isTrue); + expect(scanner.lastMatch![1], equals('oo')); + expect(scanner.position, equals(3)); + expect(scanner.rest, equals(' bar')); + + expect(scanner.scan(RegExp(' b(..)')), isTrue); + expect(scanner.lastMatch![1], equals('ar')); + expect(scanner.position, equals(7)); + expect(scanner.rest, equals('')); + expect(scanner.isDone, isTrue); + expect(scanner.expectDone, isNot(throwsFormatException)); + }); + }); + + group('after a scan', () { + late StringScanner scanner; + setUp(() { + scanner = StringScanner('foo bar'); + expect(scanner.scan('foo'), isTrue); + }); + + test('readChar returns the first character and unsets the last match', () { + expect(scanner.readChar(), equals($space)); + expect(scanner.lastMatch, isNull); + expect(scanner.position, equals(4)); + }); + + test('readCodePoint returns the first character and unsets the last match', + () { + expect(scanner.readCodePoint(), equals($space)); + expect(scanner.lastMatch, isNull); + expect(scanner.position, equals(4)); + }); + + test('a matching scanChar returns true and unsets the last match', () { + expect(scanner.scanChar($space), isTrue); + expect(scanner.lastMatch, isNull); + expect(scanner.position, equals(4)); + }); + + test('a matching expectChar returns true and unsets the last match', () { + scanner.expectChar($space); + expect(scanner.lastMatch, isNull); + expect(scanner.position, equals(4)); + }); + }); + + group('at the end of a string', () { + late StringScanner scanner; + setUp(() { + scanner = StringScanner('foo bar'); + expect(scanner.scan('foo bar'), isTrue); + }); + + test('is done', () { + expect(scanner.isDone, isTrue); + expect(scanner.expectDone, isNot(throwsFormatException)); + }); + + test('rest is empty', () { + expect(scanner.rest, isEmpty); + }); + + test('position is zero', () { + expect(scanner.position, equals(7)); + }); + + test("readChar fails and doesn't change the state", () { + expect(scanner.readChar, throwsFormatException); + expect(scanner.lastMatch, isNotNull); + expect(scanner.position, equals(7)); + }); + + test("readCodePoint fails and doesn't change the state", () { + expect(scanner.readCodePoint, throwsFormatException); + expect(scanner.lastMatch, isNotNull); + expect(scanner.position, equals(7)); + }); + + test("peekChar returns null and doesn't change the state", () { + expect(scanner.peekChar(), isNull); + expect(scanner.lastMatch, isNotNull); + expect(scanner.position, equals(7)); + }); + + test("peekCodePoint returns null and doesn't change the state", () { + expect(scanner.peekCodePoint(), isNull); + expect(scanner.lastMatch, isNotNull); + expect(scanner.position, equals(7)); + }); + + test("scanChar returns false and doesn't change the state", () { + expect(scanner.scanChar($f), isFalse); + expect(scanner.lastMatch, isNotNull); + expect(scanner.position, equals(7)); + }); + + test("expectChar fails and doesn't change the state", () { + expect(() => scanner.expectChar($f), throwsFormatException); + expect(scanner.lastMatch, isNotNull); + expect(scanner.position, equals(7)); + }); + + test('scan returns false and sets lastMatch to null', () { + expect(scanner.scan(RegExp('.')), isFalse); + expect(scanner.lastMatch, isNull); + expect(scanner.position, equals(7)); + }); + + test('expect throws a FormatException and sets lastMatch to null', () { + expect(() => scanner.expect(RegExp('.')), throwsFormatException); + expect(scanner.lastMatch, isNull); + expect(scanner.position, equals(7)); + }); + + test('matches returns false sets lastMatch to null', () { + expect(scanner.matches(RegExp('.')), isFalse); + expect(scanner.lastMatch, isNull); + expect(scanner.position, equals(7)); + }); + + test('substring from the beginning returns the whole string', () { + expect(scanner.substring(0), equals('foo bar')); + }); + + test('substring with a custom start returns a substring from there', () { + expect(scanner.substring(4), equals('bar')); + }); + + test('substring with a custom start and end returns that substring', () { + expect(scanner.substring(3, 5), equals(' b')); + }); + + test('setting position to 1 moves the cursor backward', () { + scanner.position = 1; + expect(scanner.position, equals(1)); + expect(scanner.rest, equals('oo bar')); + + expect(scanner.scan(RegExp('oo.')), isTrue); + expect(scanner.lastMatch![0], equals('oo ')); + expect(scanner.position, equals(4)); + expect(scanner.rest, equals('bar')); + }); + + test('setting and resetting position clears lastMatch', () { + final oldPosition = scanner.position; + scanner.position = 1; + scanner.position = oldPosition; + expect(scanner.lastMatch, isNull); + }); + + test('setting position beyond the string throws an ArgumentError', () { + expect(() { + scanner.position = 8; + }, throwsArgumentError); + }); + + test('setting position to -1 throws an ArgumentError', () { + expect(() { + scanner.position = -1; + }, throwsArgumentError); + }); + }); + + group('before a surrogate pair', () { + final codePoint = '\uD83D\uDC6D'.runes.first; + const highSurrogate = 0xD83D; + + late StringScanner scanner; + setUp(() { + scanner = StringScanner('foo: \uD83D\uDC6D'); + expect(scanner.scan('foo: '), isTrue); + }); + + test('readChar returns the high surrogate and moves into the pair', () { + expect(scanner.readChar(), equals(highSurrogate)); + expect(scanner.position, equals(6)); + }); + + test('readCodePoint returns the code unit and moves past the pair', () { + expect(scanner.readCodePoint(), equals(codePoint)); + expect(scanner.position, equals(7)); + }); + + test('peekChar returns the high surrogate', () { + expect(scanner.peekChar(), equals(highSurrogate)); + expect(scanner.position, equals(5)); + }); + + test('peekCodePoint returns the code unit', () { + expect(scanner.peekCodePoint(), equals(codePoint)); + expect(scanner.position, equals(5)); + }); + + test('scanChar with the high surrogate moves into the pair', () { + expect(scanner.scanChar(highSurrogate), isTrue); + expect(scanner.position, equals(6)); + }); + + test('scanChar with the code point moves past the pair', () { + expect(scanner.scanChar(codePoint), isTrue); + expect(scanner.position, equals(7)); + }); + + test('expectChar with the high surrogate moves into the pair', () { + scanner.expectChar(highSurrogate); + expect(scanner.position, equals(6)); + }); + + test('expectChar with the code point moves past the pair', () { + scanner.expectChar(codePoint); + expect(scanner.position, equals(7)); + }); + }); + + group('before an invalid surrogate pair', () { + // This surrogate pair is invalid because U+E000 is just outside the range + // of low surrogates. If it were interpreted as a surrogate pair anyway, the + // value would be U+110000, which is outside of the Unicode gamut. + const codePoint = 0x110000; + const highSurrogate = 0xD800; + + late StringScanner scanner; + setUp(() { + scanner = StringScanner('foo: \uD800\uE000'); + expect(scanner.scan('foo: '), isTrue); + }); + + test('readChar returns the high surrogate and moves into the pair', () { + expect(scanner.readChar(), equals(highSurrogate)); + expect(scanner.position, equals(6)); + }); + + test('readCodePoint returns the high surrogate and moves past the pair', + () { + expect(scanner.readCodePoint(), equals(highSurrogate)); + expect(scanner.position, equals(6)); + }); + + test('peekChar returns the high surrogate', () { + expect(scanner.peekChar(), equals(highSurrogate)); + expect(scanner.position, equals(5)); + }); + + test('peekCodePoint returns the high surrogate', () { + expect(scanner.peekCodePoint(), equals(highSurrogate)); + expect(scanner.position, equals(5)); + }); + + test('scanChar with the high surrogate moves into the pair', () { + expect(scanner.scanChar(highSurrogate), isTrue); + expect(scanner.position, equals(6)); + }); + + test('scanChar with the fake code point returns false', () { + expect(scanner.scanChar(codePoint), isFalse); + expect(scanner.position, equals(5)); + }); + + test('expectChar with the high surrogate moves into the pair', () { + scanner.expectChar(highSurrogate); + expect(scanner.position, equals(6)); + }); + + test('expectChar with the fake code point fails', () { + expect(() => scanner.expectChar(codePoint), throwsRangeError); + }); + }); + + group('a scanner constructed with a custom position', () { + test('starts scanning from that position', () { + final scanner = StringScanner('foo bar', position: 1); + expect(scanner.position, equals(1)); + expect(scanner.rest, equals('oo bar')); + + expect(scanner.scan(RegExp('oo.')), isTrue); + expect(scanner.lastMatch![0], equals('oo ')); + expect(scanner.position, equals(4)); + expect(scanner.rest, equals('bar')); + }); + + test('throws an ArgumentError if the position is -1', () { + expect(() => StringScanner('foo bar', position: -1), throwsArgumentError); + }); + + test('throws an ArgumentError if the position is beyond the string', () { + expect(() => StringScanner('foo bar', position: 8), throwsArgumentError); + }); + }); +} diff --git a/pkgs/string_scanner/test/utils.dart b/pkgs/string_scanner/test/utils.dart new file mode 100644 index 000000000..ca03c064f --- /dev/null +++ b/pkgs/string_scanner/test/utils.dart @@ -0,0 +1,12 @@ +// Copyright (c) 2014, the Dart project authors. Please see the AUTHORS file +// for details. All rights reserved. Use of this source code is governed by a +// BSD-style license that can be found in the LICENSE file. + +import 'package:string_scanner/string_scanner.dart'; +import 'package:test/test.dart'; + +/// Returns a matcher that asserts that a closure throws a +/// [StringScannerException] with the given [text]. +Matcher throwsStringScannerException(String text) => + throwsA(const TypeMatcher() + .having((e) => e.span!.text, 'span.text', text));