From dd9b489f9755138e5534a951336dafa5ef5955cc Mon Sep 17 00:00:00 2001 From: Niels de Bruin Date: Mon, 9 Dec 2024 15:29:12 +0100 Subject: [PATCH 1/5] Implement some performance improvements on Find recipe --- .../main/java/org/openrewrite/text/Find.java | 37 ++++++++++++++++--- 1 file changed, 32 insertions(+), 5 deletions(-) diff --git a/rewrite-core/src/main/java/org/openrewrite/text/Find.java b/rewrite-core/src/main/java/org/openrewrite/text/Find.java index 90450be6d64..e3352470630 100644 --- a/rewrite-core/src/main/java/org/openrewrite/text/Find.java +++ b/rewrite-core/src/main/java/org/openrewrite/text/Find.java @@ -28,6 +28,7 @@ import java.util.ArrayList; import java.util.Arrays; +import java.util.LinkedList; import java.util.List; import java.util.regex.Matcher; import java.util.regex.Pattern; @@ -91,6 +92,18 @@ public String getDescription() { @Nullable String filePattern; + private static LinkedList findAllNewLineIndexes(String input) { + LinkedList indexes = new LinkedList<>(); + int index = input.indexOf('\n'); // Find the first occurrence + + while (index != -1) { + indexes.add(index); // Add the index to the list + index = input.indexOf('\n', index + 1); // Find the next occurrence + } + + return indexes; + } + @Override public TreeVisitor getVisitor() { @@ -123,24 +136,38 @@ public Tree visit(@Nullable Tree tree, ExecutionContext ctx) { return sourceFile; } matcher.reset(); + + String sourceFilePath = sourceFile.getSourcePath().toString(); + List snippets = new ArrayList<>(); int previousEnd = 0; + + LinkedList newlineIndexes = findAllNewLineIndexes(rawText); + int lastNewLineIndex = -1; + while (matcher.find()) { int matchStart = matcher.start(); snippets.add(snippet(rawText.substring(previousEnd, matchStart))); snippets.add(SearchResult.found(snippet(rawText.substring(matchStart, matcher.end())))); previousEnd = matcher.end(); - int startLine = Math.max(0, rawText.substring(0, matchStart).lastIndexOf('\n') + 1); + while (!newlineIndexes.isEmpty() && newlineIndexes.peek() < matchStart) { + lastNewLineIndex = newlineIndexes.pop(); + } + int startLine = Math.max(0, lastNewLineIndex + 1); + int endLine = rawText.indexOf('\n', matcher.end()); if (endLine == -1) { endLine = rawText.length(); } textMatches.insertRow(ctx, new TextMatches.Row( - sourceFile.getSourcePath().toString(), - rawText.substring(startLine, matcher.start()) + "~~>" + - rawText.substring(matcher.start(), endLine) + sourceFilePath, + new StringBuilder(endLine - startLine + 3) + .append(rawText, startLine, matcher.start()) + .append("~~>") + .append(rawText, matcher.start(), endLine) + .toString() )); } snippets.add(snippet(rawText.substring(previousEnd))); @@ -160,8 +187,8 @@ public Tree visit(@Nullable Tree tree, ExecutionContext ctx) { return visitor; } - private static PlainText.Snippet snippet(String text) { return new PlainText.Snippet(Tree.randomId(), Markers.EMPTY, text); } + } From 9bd3ac89dde0a56cc0b3bde7549e61f2952349a8 Mon Sep 17 00:00:00 2001 From: Niels de Bruin Date: Tue, 10 Dec 2024 16:32:36 +0100 Subject: [PATCH 2/5] Add extra tests --- .../java/org/openrewrite/text/FindTest.java | 102 ++++++++++++++++++ 1 file changed, 102 insertions(+) diff --git a/rewrite-core/src/test/java/org/openrewrite/text/FindTest.java b/rewrite-core/src/test/java/org/openrewrite/text/FindTest.java index 0bfae8cadbb..61fb80d37f4 100644 --- a/rewrite-core/src/test/java/org/openrewrite/text/FindTest.java +++ b/rewrite-core/src/test/java/org/openrewrite/text/FindTest.java @@ -111,4 +111,106 @@ void caseInsensitive() { ) ); } + + @Test + void regexBasicMultiLine() { + rewriteRun( + spec -> spec.recipe(new Find("[T\\s]", true, true, true, null, null)), + text( + """ + This is\ttext. + This is\ttext. + """, + """ + ~~>This~~> is~~>\ttext.~~> + ~~>This~~> is~~>\ttext. + """ + ) + ); + } + + @Test + void regexWithoutMultilineAndDotall() { + rewriteRun( + spec -> spec.recipe(new Find("^This.*below\\.$", true, true, false, false, null)), + text( + """ + This is text. + This is a line below. + This is a line above. + This is text. + This is a line below. + """ + ) + ); + } + + @Test + void regexWithoutMultilineAndWithDotAll() { + rewriteRun( + spec -> spec.recipe(new Find("^This.*below\\.$", true, true, false, true, null)), + text( + """ + This is text. + This is a line below. + This is a line above. + This is text. + This is a line below. + """, + """ + ~~>This is text. + This is a line below. + This is a line above. + This is text. + This is a line below. + """ + ) + ); + } + + @Test + void regexWithMultilineAndWithoutDotall() { + rewriteRun( + spec -> spec.recipe(new Find("^This.*below\\.$", true, true, true, false, null)), + text( + """ + This is text. + This is a line below. + This is a line above. + This is text. + This is a line below. + """, + """ + This is text. + ~~>This is a line below. + This is a line above. + This is text. + ~~>This is a line below. + """ + ) + ); + } + + @Test + void regexWithBothMultilineAndDotAll() { + rewriteRun( + spec -> spec.recipe(new Find("^This.*below\\.$", true, true, true, true, null)), + text( + """ + This is text. + This is a line below. + This is a line above. + This is text. + This is a line below. + """, + """ + ~~>This is text. + This is a line below. + This is a line above. + This is text. + This is a line below. + """ + ) + ); + } } From 4c375f9f2f365c19b3f38ccea76b8c350ef85c46 Mon Sep 17 00:00:00 2001 From: Niels de Bruin Date: Tue, 10 Dec 2024 18:14:14 +0100 Subject: [PATCH 3/5] Modify last test --- .../java/org/openrewrite/text/FindTest.java | 25 ++++++++++++++++--- 1 file changed, 22 insertions(+), 3 deletions(-) diff --git a/rewrite-core/src/test/java/org/openrewrite/text/FindTest.java b/rewrite-core/src/test/java/org/openrewrite/text/FindTest.java index 61fb80d37f4..b30110349a7 100644 --- a/rewrite-core/src/test/java/org/openrewrite/text/FindTest.java +++ b/rewrite-core/src/test/java/org/openrewrite/text/FindTest.java @@ -144,6 +144,25 @@ void regexWithoutMultilineAndDotall() { ) ); } + @Test + void regexMatchingWhitespaceWithoutMultilineWithDotall() { + rewriteRun( + spec -> spec.recipe(new Find("One.Two$", true, true, false, true, null)), + //language=csv + text( // the `.` above matches the space character on the same line + """ + Zero + One Two + Three + """, + """ + Zero + ~~>One Two + Three + """ + ) + ); + } @Test void regexWithoutMultilineAndWithDotAll() { @@ -197,15 +216,15 @@ void regexWithBothMultilineAndDotAll() { spec -> spec.recipe(new Find("^This.*below\\.$", true, true, true, true, null)), text( """ - This is text. + The first line. This is a line below. This is a line above. This is text. This is a line below. """, """ - ~~>This is text. - This is a line below. + The first line. + ~~>This is a line below. This is a line above. This is text. This is a line below. From 98be6173375b28209c973656e5a305ec7d7d4fde Mon Sep 17 00:00:00 2001 From: Niels de Bruin Date: Wed, 11 Dec 2024 10:42:29 +0100 Subject: [PATCH 4/5] Restore linked list --- rewrite-core/src/main/java/org/openrewrite/text/Find.java | 6 +++++- .../src/test/java/org/openrewrite/text/FindTest.java | 6 +----- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/rewrite-core/src/main/java/org/openrewrite/text/Find.java b/rewrite-core/src/main/java/org/openrewrite/text/Find.java index e3352470630..e2544b372ef 100644 --- a/rewrite-core/src/main/java/org/openrewrite/text/Find.java +++ b/rewrite-core/src/main/java/org/openrewrite/text/Find.java @@ -142,10 +142,14 @@ public Tree visit(@Nullable Tree tree, ExecutionContext ctx) { List snippets = new ArrayList<>(); int previousEnd = 0; - LinkedList newlineIndexes = findAllNewLineIndexes(rawText); + LinkedList newlineIndexes = null; int lastNewLineIndex = -1; while (matcher.find()) { + if (newlineIndexes == null) { + newlineIndexes = findAllNewLineIndexes(rawText); + } + int matchStart = matcher.start(); snippets.add(snippet(rawText.substring(previousEnd, matchStart))); snippets.add(SearchResult.found(snippet(rawText.substring(matchStart, matcher.end())))); diff --git a/rewrite-core/src/test/java/org/openrewrite/text/FindTest.java b/rewrite-core/src/test/java/org/openrewrite/text/FindTest.java index b30110349a7..b887986efb1 100644 --- a/rewrite-core/src/test/java/org/openrewrite/text/FindTest.java +++ b/rewrite-core/src/test/java/org/openrewrite/text/FindTest.java @@ -144,6 +144,7 @@ void regexWithoutMultilineAndDotall() { ) ); } + @Test void regexMatchingWhitespaceWithoutMultilineWithDotall() { rewriteRun( @@ -154,11 +155,6 @@ void regexMatchingWhitespaceWithoutMultilineWithDotall() { Zero One Two Three - """, - """ - Zero - ~~>One Two - Three """ ) ); From 0b0de0b89dffcba8515836352a7109e631a0ecb0 Mon Sep 17 00:00:00 2001 From: Niels de Bruin Date: Wed, 11 Dec 2024 11:58:42 +0100 Subject: [PATCH 5/5] More performance gains --- .../main/java/org/openrewrite/text/Find.java | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/rewrite-core/src/main/java/org/openrewrite/text/Find.java b/rewrite-core/src/main/java/org/openrewrite/text/Find.java index e2544b372ef..e0503c6d73a 100644 --- a/rewrite-core/src/main/java/org/openrewrite/text/Find.java +++ b/rewrite-core/src/main/java/org/openrewrite/text/Find.java @@ -26,10 +26,7 @@ import org.openrewrite.remote.Remote; import org.openrewrite.table.TextMatches; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.LinkedList; -import java.util.List; +import java.util.*; import java.util.regex.Matcher; import java.util.regex.Pattern; @@ -92,10 +89,14 @@ public String getDescription() { @Nullable String filePattern; - private static LinkedList findAllNewLineIndexes(String input) { - LinkedList indexes = new LinkedList<>(); - int index = input.indexOf('\n'); // Find the first occurrence + private static Deque findAllNewLineIndexes(String input, int offset) { + ArrayDeque indexes = new ArrayDeque<>(); + int index = input.lastIndexOf('\n', offset); // Find the first occurrence + if (index != -1) { + indexes.add(index); + } + index = input.indexOf('\n', offset); // Find occurrence after the offset while (index != -1) { indexes.add(index); // Add the index to the list index = input.indexOf('\n', index + 1); // Find the next occurrence @@ -142,12 +143,12 @@ public Tree visit(@Nullable Tree tree, ExecutionContext ctx) { List snippets = new ArrayList<>(); int previousEnd = 0; - LinkedList newlineIndexes = null; + Deque newlineIndexes = null; int lastNewLineIndex = -1; while (matcher.find()) { if (newlineIndexes == null) { - newlineIndexes = findAllNewLineIndexes(rawText); + newlineIndexes = findAllNewLineIndexes(rawText, matcher.start()); } int matchStart = matcher.start();