Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Optimize tokenizer #438

Merged
merged 10 commits into from
Aug 14, 2024
4 changes: 2 additions & 2 deletions .vscode/tasks.json
Original file line number Diff line number Diff line change
Expand Up @@ -101,10 +101,10 @@
"background": {
"activeOnStart": true,
"beginsPattern": {
"regexp": "^Type-checking in progress|[Cc]ompiled (?:.*?successfully|with .*?(?:error|warning))"
"regexp": "^asset|cached"
},
"endsPattern": {
"regexp": "^Found \\d+ (?:error|warning)|^No errors found"
"regexp": "[Cc]ompiled (?:.*?successfully|with .*?(?:error|warning))"
}
}
},
Expand Down
16 changes: 6 additions & 10 deletions src/color.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
// Color conversion methods for Color provider
import { CancellationToken, Color, ColorInformation, ColorPresentation, ProviderResult, Range, TextDocument, TextEdit, languages } from "vscode";
import { CancellationToken, Color, ColorInformation, ColorPresentation, DocumentColorProvider, Range, TextDocument, TextEdit, languages } from "vscode";
import { ValueEqualsSet } from "./utilities/hashset";
import { Tokenizer } from "./tokenizer/tokenizer";
import { LiteralTokenType } from "./tokenizer/renpy-tokens";
Expand All @@ -11,26 +11,22 @@ export type DocumentColorContext = {
};

export const colorProvider = languages.registerColorProvider("renpy", {
provideDocumentColors(document: TextDocument, token: CancellationToken): ProviderResult<ColorInformation[]> {
provideDocumentColors(document: TextDocument, token: CancellationToken) {
if (token.isCancellationRequested) {
return;
}

return new Promise((resolve) => {
resolve(getColorInformation(document));
});
return Promise.resolve(getColorInformation(document));
},

provideColorPresentations(color: Color, context: DocumentColorContext, token: CancellationToken): ProviderResult<ColorPresentation[]> {
provideColorPresentations(color: Color, context: DocumentColorContext, token: CancellationToken) {
if (token.isCancellationRequested) {
return;
}

return new Promise((resolve) => {
resolve(getColorPresentations(color, context));
});
return Promise.resolve(getColorPresentations(color, context));
},
});
} as DocumentColorProvider);

/**
* Finds all colors in the given document and returns their ranges and color
Expand Down
54 changes: 23 additions & 31 deletions src/tokenizer/python-token-patterns.g.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
// THIS FILE HAS BEEN GENERATED BY THE `syntax-to-token-pattern.py` GENERATOR
// DO NOT EDIT THIS FILE DIRECTLY! INSTEAD RUN THE PYTHON SCRIPT.
// ANY MANUAL EDITS MADE TO THIS FILE WILL BE OVERWRITTEN. YOU HAVE BEEN WARNED.
// Last generated: 14/08/2024 09:13:39 (UTC+0)
// Last generated: 14/08/2024 16:34:44 (UTC+0)

import { MetaTokenType, KeywordTokenType, CharacterTokenType, EntityTokenType, OperatorTokenType, LiteralTokenType } from "./renpy-tokens";
import { TokenPattern } from "./token-pattern-types";
Expand Down Expand Up @@ -62,7 +62,7 @@ export const comments: TokenPattern = {
debugName: "comments.patterns![1].patterns![1]",

token: MetaTokenType.TypehintType, /*comment.typehint.type.notation.python*/
match: /(?<!\.)\b(bool|bytes|float|int|object|str|List|Dict|Iterable|Sequence|Set|FrozenSet|Callable|Union|Tuple|Any|None)\b/g,
match: /\b(?<!\.)(bool|bytes|float|int|object|str|List|Dict|Iterable|Sequence|Set|FrozenSet|Callable|Union|Tuple|Any|None)\b/g,
},
{
debugName: "comments.patterns![1].patterns![2]",
Expand Down Expand Up @@ -351,7 +351,7 @@ export const oddFunctionCall: TokenPattern = {

// A bit obscured function call where there may have been an arbitrary number of other operations to get the function.E.g. "arr[idx](args)"
token: MetaTokenType.FunctionCall, /*meta.function-call.python*/
begin: /(?<=\]|\)|")\s*(?=\()/g,
begin: /(?<=[\]\)"])\s*(?=\()/g,
end: /(\))/dg,
endCaptures: {
1: { token: CharacterTokenType.CloseParentheses, /*punctuation.definition.arguments.end.python*/ },
Expand Down Expand Up @@ -821,21 +821,13 @@ export const lambda: TokenPattern = {
{
debugName: "lambda.patterns![0]",

match: /((?<=\.)lambda|lambda(?=\s*[\.=]))/dg,
captures: {
1: { token: MetaTokenType.ControlFlowKeyword, /*keyword.control.flow.python*/ },
},
},
{
debugName: "lambda.patterns![1]",

match: /\b(lambda)\s*?(?=[,\n]|$)/dgm,
captures: {
1: { token: KeywordTokenType.Lambda, /*storage.type.function.lambda.python*/ },
},
},
{
debugName: "lambda.patterns![2]",
debugName: "lambda.patterns![1]",

token: MetaTokenType.LambdaFunction, /*meta.lambda-function.python*/
contentToken: MetaTokenType.FunctionDefinition, /*meta.function.lambda.parameters.python*/
Expand All @@ -849,19 +841,19 @@ export const lambda: TokenPattern = {
},
patterns: [
{
debugName: "lambda.patterns![2].patterns![0]",
debugName: "lambda.patterns![1].patterns![0]",

token: OperatorTokenType.PositionalParameter, /*keyword.operator.positional.parameter.python*/
match: /\//g,
},
{
debugName: "lambda.patterns![2].patterns![1]",
debugName: "lambda.patterns![1].patterns![1]",

token: OperatorTokenType.Unpacking, /*keyword.operator.unpacking.parameter.python*/
match: /(\*\*|\*)/g,
},
{
debugName: "lambda.patterns![2].patterns![4]",
debugName: "lambda.patterns![1].patterns![4]",

match: /([a-zA-Z_]\w*)\s*(?:(,)|(?=:|$))/dgm,
captures: {
Expand Down Expand Up @@ -1141,7 +1133,7 @@ export const callWrapperInheritance: TokenPattern = {

// same as a function call, but in inheritance context
token: MetaTokenType.FunctionCall, /*meta.function-call.python*/
begin: /\b(?=([a-zA-Z_]\w*)\s*(\())/g,
begin: /\b(?=[a-zA-Z_]\w*\s*\()/g,
end: /(\))/dg,
endCaptures: {
1: { token: CharacterTokenType.CloseParentheses, /*punctuation.definition.arguments.end.python*/ },
Expand All @@ -1162,7 +1154,7 @@ export const functionCall: TokenPattern = {

// Regular function call of the type "name(args)"
token: MetaTokenType.FunctionCall, /*meta.function-call.python*/
begin: /\b(?=([a-zA-Z_]\w*)\s*(\())/g,
begin: /\b(?=[a-zA-Z_]\w*\s*\()/g,
end: /(\))/dg,
endCaptures: {
1: { token: CharacterTokenType.CloseParentheses, /*punctuation.definition.arguments.end.python*/ },
Expand All @@ -1178,7 +1170,7 @@ export const functionName: TokenPattern = {

// Some color schemas support meta.function-call.generic scope
token: MetaTokenType.FunctionCall, /*entity.name.function.call.python meta.function-call.generic.python*/
match: /\b([a-zA-Z_]\w*)\b/g,
match: /\b[a-zA-Z_]\w*\b/g,
},
]
};
Expand Down Expand Up @@ -1250,7 +1242,7 @@ export const builtinExceptions: TokenPattern = {
debugName: "builtinExceptions",

token: MetaTokenType.BuiltinExceptionType, /*support.type.exception.python*/
match: /(?<!\.)\b((Arithmetic|Assertion|Attribute|Buffer|BlockingIO|BrokenPipe|ChildProcess|(Connection(Aborted|Refused|Reset)?)|EOF|Environment|FileExists|FileNotFound|FloatingPoint|IO|Import|Indentation|Index|Interrupted|IsADirectory|NotADirectory|Permission|ProcessLookup|Timeout|Key|Lookup|Memory|Name|NotImplemented|OS|Overflow|Reference|Runtime|Recursion|Syntax|System|Tab|Type|UnboundLocal|Unicode(Encode|Decode|Translate)?|Value|Windows|ZeroDivision|ModuleNotFound)Error|((Pending)?Deprecation|Runtime|Syntax|User|Future|Import|Unicode|Bytes|Resource)?Warning|SystemExit|Stop(Async)?Iteration|KeyboardInterrupt|GeneratorExit|(Base)?Exception)\b/g,
match: /\b(?<!\.)((Arithmetic|Assertion|Attribute|Buffer|BlockingIO|BrokenPipe|ChildProcess|(Connection(Aborted|Refused|Reset)?)|EOF|Environment|FileExists|FileNotFound|FloatingPoint|IO|Import|Indentation|Index|Interrupted|IsADirectory|NotADirectory|Permission|ProcessLookup|Timeout|Key|Lookup|Memory|Name|NotImplemented|OS|Overflow|Reference|Runtime|Recursion|Syntax|System|Tab|Type|UnboundLocal|Unicode(Encode|Decode|Translate)?|Value|Windows|ZeroDivision|ModuleNotFound)Error|((Pending)?Deprecation|Runtime|Syntax|User|Future|Import|Unicode|Bytes|Resource)?Warning|SystemExit|Stop(Async)?Iteration|KeyboardInterrupt|GeneratorExit|(Base)?Exception)\b/g,
};

export const builtinFunctions: TokenPattern = {
Expand All @@ -1259,13 +1251,13 @@ export const builtinFunctions: TokenPattern = {
debugName: "builtinFunctions.patterns![0]",

token: EntityTokenType.FunctionName, /*support.function.builtin.python*/
match: /(?<!\.)\b(__import__|abs|aiter|all|any|anext|ascii|bin|breakpoint|callable|chr|compile|copyright|credits|delattr|dir|divmod|enumerate|eval|exec|exit|filter|format|getattr|globals|hasattr|hash|help|hex|id|input|isinstance|issubclass|iter|len|license|locals|map|max|memoryview|min|next|oct|open|ord|pow|print|quit|range|reload|repr|reversed|round|setattr|sorted|sum|vars|zip)\b/g,
match: /\b(?<!\.)(__import__|abs|aiter|all|any|anext|ascii|bin|breakpoint|callable|chr|compile|copyright|credits|delattr|dir|divmod|enumerate|eval|exec|exit|filter|format|getattr|globals|hasattr|hash|help|hex|id|input|isinstance|issubclass|iter|len|license|locals|map|max|memoryview|min|next|oct|open|ord|pow|print|quit|range|reload|repr|reversed|round|setattr|sorted|sum|vars|zip)\b/g,
},
{
debugName: "builtinFunctions.patterns![1]",

token: EntityTokenType.Identifier, /*variable.legacy.builtin.python*/
match: /(?<!\.)\b(file|reduce|intern|raw_input|unicode|cmp|basestring|execfile|long|xrange)\b/g,
match: /\b(?<!\.)(file|reduce|intern|raw_input|unicode|cmp|basestring|execfile|long|xrange)\b/g,
},
]
};
Expand All @@ -1274,7 +1266,7 @@ export const builtinTypes: TokenPattern = {
debugName: "builtinTypes",

token: MetaTokenType.BuiltinType, /*support.type.python*/
match: /(?<!\.)\b(bool|bytearray|bytes|classmethod|complex|dict|float|frozenset|int|list|object|property|set|slice|staticmethod|str|tuple|type|super)\b/g,
match: /\b(?<!\.)(bool|bytearray|bytes|classmethod|complex|dict|float|frozenset|int|list|object|property|set|slice|staticmethod|str|tuple|type|super)\b/g,
};

export const magicFunctionNames: TokenPattern = {
Expand Down Expand Up @@ -2512,10 +2504,10 @@ export const stringQuotedSingleLine: TokenPattern = {
debugName: "stringQuotedSingleLine",

token: LiteralTokenType.String, /*string.quoted.single.python*/
begin: /(?:\b([rR])(?=[uU]))?([uU])?(['"])/dg,
begin: /(\b[rRuU])?(\b\w+)?(['"])/dg,
beginCaptures: {
1: { token: MetaTokenType.Invalid, /*invalid.illegal.prefix.python*/ },
2: { token: MetaTokenType.StringStorageType, /*storage.type.string.python*/ },
1: { token: MetaTokenType.StringStorageType, /*storage.type.string.python*/ },
2: { token: MetaTokenType.Invalid, /*invalid.illegal.prefix.python*/ },
3: { token: MetaTokenType.StringBegin, /*punctuation.definition.string.begin.python*/ },
},
// @ts-ignore: Back references in end patterns are replaced by begin matches at runtime
Expand Down Expand Up @@ -3112,12 +3104,12 @@ classInheritance.patterns!.splice(5, 0, classKwarg);
classInheritance.patterns!.splice(6, 0, callWrapperInheritance);
classInheritance.patterns!.push(memberAccessClass, inheritanceIdentifier);
memberAccessClass.patterns!.splice(0, 0, callWrapperInheritance);
lambda.patterns![2].patterns!.splice(2, 0, lambdaNestedIncomplete);
lambda.patterns![2].patterns!.splice(3, 0, illegalNames);
lambda.patterns![2].patterns!.splice(6, 0, backticks);
lambda.patterns![2].patterns!.splice(7, 0, illegalAnno);
lambda.patterns![2].patterns!.splice(8, 0, lambdaParameterWithDefault);
lambda.patterns![2].patterns!.push(illegalOperator);
lambda.patterns![1].patterns!.splice(2, 0, lambdaNestedIncomplete);
lambda.patterns![1].patterns!.splice(3, 0, illegalNames);
lambda.patterns![1].patterns!.splice(6, 0, backticks);
lambda.patterns![1].patterns!.splice(7, 0, illegalAnno);
lambda.patterns![1].patterns!.splice(8, 0, lambdaParameterWithDefault);
lambda.patterns![1].patterns!.push(illegalOperator);
functionDeclaration.patterns!.splice(0, 0, functionDefName);
functionDeclaration.patterns!.splice(1, 0, parameters);
functionDeclaration.patterns!.push(returnAnnotation);
Expand Down
10 changes: 5 additions & 5 deletions src/tokenizer/renpy-token-patterns.g.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
// THIS FILE HAS BEEN GENERATED BY THE `syntax-to-token-pattern.py` GENERATOR
// DO NOT EDIT THIS FILE DIRECTLY! INSTEAD RUN THE PYTHON SCRIPT.
// ANY MANUAL EDITS MADE TO THIS FILE WILL BE OVERWRITTEN. YOU HAVE BEEN WARNED.
// Last generated: 14/08/2024 09:13:39 (UTC+0)
// Last generated: 14/08/2024 16:34:44 (UTC+0)

import { MetaTokenType, CharacterTokenType, LiteralTokenType, EntityTokenType, KeywordTokenType, EscapedCharacterTokenType, OperatorTokenType } from "./renpy-tokens";
import { TokenPattern } from "./token-pattern-types";
Expand Down Expand Up @@ -645,7 +645,7 @@ export const stringTags: TokenPattern = {

// Valid tags with numeric params (self-closing)
token: MetaTokenType.StringTag, /*meta.string.tag.${2:/downcase}.self-closing.renpy*/
match: /({)\s*(p|w)(=)(\+?)(\d*(?:.\d+)?)\s*(})/dg,
match: /({)\s*(p|w)(=)(\+)?(\d*(?:.\d+)?)\s*(})/dg,
captures: {
1: { token: CharacterTokenType.OpenBracket, /*constant.character.format.placeholder.other.renpy punctuation.definition.tag.begin.renpy*/ },
2: { token: EntityTokenType.TagName, /*entity.name.tag.${2:/downcase}.renpy*/ },
Expand All @@ -660,7 +660,7 @@ export const stringTags: TokenPattern = {

// Valid tags with numeric params (self-closing)
token: MetaTokenType.StringTag, /*meta.string.tag.${2:/downcase}.self-closing.renpy*/
match: /({)\s*(v?space)(=)(\+?)(\d+)\s*(})/dg,
match: /({)\s*(v?space)(=)(\+)?(\d+)\s*(})/dg,
captures: {
1: { token: CharacterTokenType.OpenBracket, /*constant.character.format.placeholder.other.renpy punctuation.definition.tag.begin.renpy*/ },
2: { token: EntityTokenType.TagName, /*entity.name.tag.${2:/downcase}.renpy*/ },
Expand Down Expand Up @@ -807,7 +807,7 @@ export const stringTags: TokenPattern = {
debugName: "stringTags.patterns![11]",

// Close tags
match: /({\/)\s*([a-zA-Z_]?\w*)(=?)(.*?)\s*(})/dg,
match: /({\/)\s*([a-zA-Z_]\w*)?(=)?(.*?)?\s*(})/dg,
captures: {
0: { token: MetaTokenType.StringTag, /*meta.string.tag.${2:/downcase}.end.renpy*/ },
1: { token: CharacterTokenType.OpenBracket, /*constant.character.format.placeholder.other.renpy punctuation.definition.tag.begin.renpy*/ },
Expand All @@ -821,7 +821,7 @@ export const stringTags: TokenPattern = {
debugName: "stringTags.patterns![12]",

// Custom tags (optional close)
match: /({)\s*([a-zA-Z_]?\w*)(=?)(.*?)\s*(})/dg,
match: /({)\s*([a-zA-Z_]\w*)?(=)?(.*?)?\s*(})/dg,
captures: {
0: { token: MetaTokenType.StringTag, /*meta.string.tag.${2:/downcase}.start.renpy*/ },
1: { token: CharacterTokenType.OpenBracket, /*constant.character.format.placeholder.other.renpy punctuation.definition.tag.begin.renpy*/ },
Expand Down
10 changes: 10 additions & 0 deletions src/tokenizer/token-definitions.ts
Original file line number Diff line number Diff line change
Expand Up @@ -203,6 +203,12 @@ export class TreeNode {
return this.token === null && !this.hasChildren();
}

public clear() {
this.token = null;
this.parent = null;
this.children.clear();
}

// Recursively iterate over all children
public forEach(callback: (node: TreeNode) => void): void {
this.children.forEach((child) => {
Expand Down Expand Up @@ -288,6 +294,10 @@ export class TokenTree {
this.root = new TreeNode();
}

public clear() {
this.root.clear();
}

public isEmpty(): boolean {
return !this.root.hasChildren();
}
Expand Down
Loading
Loading