-
Notifications
You must be signed in to change notification settings - Fork 5
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
26 changed files
with
1,926 additions
and
1 deletion.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,12 @@ | ||
# oap-tsv | ||
|
||
TSV is a Tab Separated Value format. Unlike Comma Separated Value (CSV) it contains ONLY tabular character to distingish columns. | ||
CSV my be divided by comma, tab, semi-colon, pipe etc. | ||
It also has some rules to separate columns with data inside them, if it contains special characters (a.k.a. escaping). | ||
TSV alsways wrap dta into quotes if separator is comma. | ||
Like [1..3] becomes '"1","2","3"' (with tabs) | ||
and [1..3] becomes '1 2 3' (with comma) | ||
|
||
Strict rules also give TSV ability to be little bit faster than CSV | ||
|
||
See https://github.com/eBay/tsv-utils/blob/master/docs/comparing-tsv-and-csv.md |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,59 @@ | ||
<?xml version="1.0" encoding="UTF-8"?> | ||
<!-- | ||
~ The MIT License (MIT) | ||
~ | ||
~ Copyright (c) Open Application Platform Authors | ||
~ | ||
~ Permission is hereby granted, free of charge, to any person obtaining a copy | ||
~ of this software and associated documentation files (the "Software"), to deal | ||
~ in the Software without restriction, including without limitation the rights | ||
~ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | ||
~ copies of the Software, and to permit persons to whom the Software is | ||
~ furnished to do so, subject to the following conditions: | ||
~ | ||
~ The above copyright notice and this permission notice shall be included in all | ||
~ copies or substantial portions of the Software. | ||
~ | ||
~ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | ||
~ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | ||
~ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | ||
~ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | ||
~ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | ||
~ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | ||
~ SOFTWARE. | ||
--> | ||
|
||
<project xmlns="http://maven.apache.org/POM/4.0.0" | ||
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" | ||
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"> | ||
<modelVersion>4.0.0</modelVersion> | ||
<name>oap-tsv-test</name> | ||
<artifactId>oap-tsv-test</artifactId> | ||
|
||
<parent> | ||
<groupId>oap</groupId> | ||
<artifactId>oap-tsv-parent</artifactId> | ||
<version>${oap.project.version}</version> | ||
</parent> | ||
|
||
<dependencies> | ||
<dependency> | ||
<groupId>oap</groupId> | ||
<artifactId>oap-tsv</artifactId> | ||
<version>${project.version}</version> | ||
</dependency> | ||
|
||
<dependency> | ||
<groupId>oap</groupId> | ||
<artifactId>oap-stdlib-test</artifactId> | ||
<version>${oap.project.version}</version> | ||
</dependency> | ||
|
||
<dependency> | ||
<groupId>org.projectlombok</groupId> | ||
<artifactId>lombok</artifactId> | ||
<version>${oap.deps.lombok.version}</version> | ||
<scope>provided</scope> | ||
</dependency> | ||
</dependencies> | ||
</project> |
232 changes: 232 additions & 0 deletions
232
oap-formats/oap-tsv/oap-tsv-test/src/main/java/oap/tsv/test/TsvAssertion.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,232 @@ | ||
/* | ||
* The MIT License (MIT) | ||
* | ||
* Copyright (c) Open Application Platform Authors | ||
* | ||
* Permission is hereby granted, free of charge, to any person obtaining a copy | ||
* of this software and associated documentation files (the "Software"), to deal | ||
* in the Software without restriction, including without limitation the rights | ||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | ||
* copies of the Software, and to permit persons to whom the Software is | ||
* furnished to do so, subject to the following conditions: | ||
* | ||
* The above copyright notice and this permission notice shall be included in all | ||
* copies or substantial portions of the Software. | ||
* | ||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | ||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | ||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | ||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | ||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | ||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | ||
* SOFTWARE. | ||
*/ | ||
|
||
package oap.tsv.test; | ||
|
||
import lombok.EqualsAndHashCode; | ||
import lombok.ToString; | ||
import oap.io.Files; | ||
import oap.io.content.ContentReader; | ||
import oap.tsv.Tsv; | ||
import oap.tsv.TsvStream.Header; | ||
import oap.util.Lists; | ||
import org.assertj.core.api.AbstractAssert; | ||
|
||
import java.io.File; | ||
import java.io.InputStream; | ||
import java.nio.file.Path; | ||
import java.util.List; | ||
|
||
import static oap.io.content.ContentReader.ofString; | ||
import static org.assertj.core.api.Assertions.assertThat; | ||
|
||
public class TsvAssertion extends AbstractAssert<TsvAssertion, Tsv> { | ||
protected TsvAssertion( String value ) { | ||
this( value, true ); | ||
} | ||
|
||
protected TsvAssertion( String value, boolean withHeaders ) { | ||
this( withHeaders ? ContentReader.read( value, Tsv.tsv.ofSeparatedValues() ).withHeaders().toTsv() | ||
: ContentReader.read( value, Tsv.tsv.ofSeparatedValues() ).toTsv() ); | ||
} | ||
|
||
protected TsvAssertion( Tsv value ) { | ||
super( value, TsvAssertion.class ); | ||
} | ||
|
||
public static TsvAssertion assertTsv( String tsv ) { | ||
return assertTsv( tsv, true ); | ||
} | ||
|
||
public static TsvAssertion assertTsv( String tsv, boolean withHeaders ) { | ||
return new TsvAssertion( tsv, withHeaders ); | ||
} | ||
|
||
public static TsvAssertion assertTsv( Tsv tsv ) { | ||
return new TsvAssertion( tsv ); | ||
} | ||
|
||
public static TsvAssertion assertTsv( Path path ) { | ||
return assertTsv( path, true ); | ||
} | ||
|
||
public static TsvAssertion assertTsv( Path path, boolean withHeaders ) { | ||
return assertTsv( Files.read( path, ofString() ), withHeaders ); | ||
} | ||
|
||
public static TsvAssertion assertTsv( File file ) { | ||
return assertTsv( file, true ); | ||
} | ||
|
||
public static TsvAssertion assertTsv( File file, boolean withHeaders ) { | ||
return assertTsv( Files.read( file.toPath(), ofString() ), withHeaders ); | ||
} | ||
|
||
public static TsvAssertion assertTsv( InputStream is ) { | ||
return assertTsv( is, true ); | ||
} | ||
|
||
public static TsvAssertion assertTsv( InputStream is, boolean withHeaders ) { | ||
return assertTsv( ContentReader.read( is, ofString() ), withHeaders ); | ||
} | ||
|
||
public static Row row( String... cols ) { | ||
return new Row( cols ); | ||
} | ||
|
||
public static Header header( String... cols ) { | ||
return new Header( cols ); | ||
} | ||
|
||
public TsvAssertion hasHeaders( String... headers ) { | ||
assertThat( actual.headers ).contains( headers ); | ||
return this; | ||
} | ||
|
||
public TsvAssertion hasHeaders( Iterable<String> headers ) { | ||
assertThat( actual.headers ).containsAll( headers ); | ||
return this; | ||
} | ||
|
||
public TsvAssertion hasHeaders( Header header ) { | ||
assertThat( actual.headers ).containsAll( header.cols ); | ||
return this; | ||
} | ||
|
||
public TsvAssertion containOnlyHeaders( String... headers ) { | ||
assertThat( actual.headers ).containsOnly( headers ); | ||
return this; | ||
} | ||
|
||
@SafeVarargs | ||
public final TsvAssertion containsExactlyInAnyOrderEntriesOf( List<String>... entries ) { | ||
assertThat( actual.data ) | ||
.containsExactlyInAnyOrderElementsOf( List.of( entries ) ); | ||
return this; | ||
} | ||
|
||
public TsvAssertion containsExactlyInAnyOrderEntriesOf( Header header, Row... rows ) { | ||
hasHeaders( header ); | ||
for( var row : rows ) { | ||
assertThat( row.cols ) | ||
.withFailMessage( "entries length doesnt match headers" ) | ||
.hasSize( header.size() ); | ||
} | ||
assertThat( actual.stream() | ||
.select( header ) | ||
.stripHeaders() | ||
.toTsv() | ||
.data ) | ||
.containsExactlyInAnyOrderElementsOf( Lists.map( rows, r -> r.cols ) ); | ||
|
||
return this; | ||
} | ||
|
||
public TsvAssertion containsAnyEntriesOf( Header header, Row... rows ) { | ||
hasHeaders( header.cols ); | ||
for( var row : rows ) { | ||
assertThat( row.cols ) | ||
.withFailMessage( "entries length doesnt match headers" ) | ||
.hasSize( header.size() ); | ||
} | ||
|
||
assertThat( actual.stream() | ||
.select( header ) | ||
.stripHeaders() | ||
.toTsv() | ||
.data ) | ||
.containsAnyElementsOf( Lists.map( rows, r -> r.cols ) ); | ||
return this; | ||
} | ||
|
||
public TsvAssertion containsOnlyOnceEntriesOf( Header header, Row... rows ) { | ||
hasHeaders( header ); | ||
for( var row : rows ) { | ||
assertThat( row.cols ) | ||
.withFailMessage( "entries length doesnt match headers" ) | ||
.hasSize( header.size() ); | ||
} | ||
assertThat( actual.stream() | ||
.select( header ) | ||
.stripHeaders() | ||
.toTsv() | ||
.data ).containsOnlyOnceElementsOf( Lists.map( rows, r -> r.cols ) ); | ||
return this; | ||
} | ||
|
||
public TsvAssertion doesNotContainAnyEntriesOf( Header header, Row... rows ) { | ||
hasHeaders( header ); | ||
for( var row : rows ) { | ||
assertThat( row.cols ) | ||
.withFailMessage( "entries length doesnt match headers" ) | ||
.hasSize( header.size() ); | ||
} | ||
|
||
assertThat( actual.stream() | ||
.select( header ) | ||
.stripHeaders() | ||
.toTsv() | ||
.data ).doesNotContainAnyElementsOf( Lists.map( rows, r -> r.cols ) ); | ||
return this; | ||
} | ||
|
||
public TsvAssertion doesNotContainAnyEntriesOf( Row... rows ) { | ||
assertThat( actual.headers ) | ||
.withFailMessage( "tsv must contain headers" ) | ||
.isNotEmpty(); | ||
for( var row : rows ) { | ||
assertThat( row.cols ) | ||
.withFailMessage( "entries length doesnt match headers" ) | ||
.hasSize( actual.headers.size() ); | ||
} | ||
assertThat( actual.data ).doesNotContainAnyElementsOf( Lists.map( rows, r -> r.cols ) ); | ||
return this; | ||
} | ||
|
||
public TsvAssertion isNotEmpty() { | ||
assertThat( actual.data ).isNotEmpty(); | ||
return this; | ||
} | ||
|
||
public TsvAssertion isEqualToTsv( String tsv ) { | ||
Tsv expected = ContentReader.read( tsv, Tsv.tsv.ofSeparatedValues() ).withHeaders().toTsv(); | ||
hasHeaders( expected.headers ); | ||
assertThat( this.actual.data ).containsExactlyInAnyOrderElementsOf( expected.data ); | ||
return this; | ||
} | ||
|
||
public TsvAssertion isEqualToTsv( Path tsv ) { | ||
return isEqualToTsv( Files.read( tsv, ofString() ) ); | ||
} | ||
|
||
@ToString | ||
@EqualsAndHashCode | ||
public static class Row { | ||
private final List<String> cols; | ||
|
||
public Row( String... cols ) { | ||
this.cols = List.of( cols ); | ||
} | ||
} | ||
} |
43 changes: 43 additions & 0 deletions
43
oap-formats/oap-tsv/oap-tsv-test/src/test/java/oap/tsv/PrinterTest.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,43 @@ | ||
/* | ||
* The MIT License (MIT) | ||
* | ||
* Copyright (c) Open Application Platform Authors | ||
* | ||
* Permission is hereby granted, free of charge, to any person obtaining a copy | ||
* of this software and associated documentation files (the "Software"), to deal | ||
* in the Software without restriction, including without limitation the rights | ||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | ||
* copies of the Software, and to permit persons to whom the Software is | ||
* furnished to do so, subject to the following conditions: | ||
* | ||
* The above copyright notice and this permission notice shall be included in all | ||
* copies or substantial portions of the Software. | ||
* | ||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | ||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | ||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | ||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | ||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | ||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | ||
* SOFTWARE. | ||
*/ | ||
|
||
package oap.tsv; | ||
|
||
import org.testng.annotations.Test; | ||
|
||
import java.util.List; | ||
|
||
import static oap.testng.Asserts.assertString; | ||
|
||
public class PrinterTest { | ||
@Test | ||
public void print() { | ||
assertString( Printer.print( List.of( 1, 2, 3 ), Tsv.DELIMITER_TAB ) ) | ||
.isEqualTo( "1\t2\t3\n" ); | ||
assertString( Printer.print( List.of( 1, 2, 3 ), Tsv.DELIMITER_COMMA, true ) ) | ||
.isEqualTo( "\"1\",\"2\",\"3\"\n" ); | ||
assertString( Printer.print( List.of( 1, "\"2\\\"", 3 ), Tsv.DELIMITER_COMMA, true ) ) | ||
.isEqualTo( "\"1\",\"\"\"2\\\\\"\"\",\"3\"\n" ); | ||
} | ||
} |
47 changes: 47 additions & 0 deletions
47
oap-formats/oap-tsv/oap-tsv-test/src/test/java/oap/tsv/TokenizerPerformance.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,47 @@ | ||
/* | ||
* The MIT License (MIT) | ||
* | ||
* Copyright (c) Open Application Platform Authors | ||
* | ||
* Permission is hereby granted, free of charge, to any person obtaining a copy | ||
* of this software and associated documentation files (the "Software"), to deal | ||
* in the Software without restriction, including without limitation the rights | ||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | ||
* copies of the Software, and to permit persons to whom the Software is | ||
* furnished to do so, subject to the following conditions: | ||
* | ||
* The above copyright notice and this permission notice shall be included in all | ||
* copies or substantial portions of the Software. | ||
* | ||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | ||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | ||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | ||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | ||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | ||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | ||
* SOFTWARE. | ||
*/ | ||
|
||
package oap.tsv; | ||
|
||
import org.apache.commons.lang3.StringUtils; | ||
import org.testng.annotations.Test; | ||
|
||
import static oap.benchmark.Benchmark.benchmark; | ||
import static oap.tsv.Tokenizer.parse; | ||
import static oap.tsv.Tsv.DELIMITER_TAB; | ||
import static org.assertj.core.api.Assertions.assertThat; | ||
|
||
public class TokenizerPerformance { | ||
|
||
@Test | ||
public void perf() { | ||
String tsv = "aaaa\tbbbb\txxxx\tddd\t19/11/2011\t33.3\taaaa\t11\txxx\tvvvv\tS\tS\t444\txxx\t4444\t1234\tN\tN"; | ||
assertThat( parse( tsv, DELIMITER_TAB ) ).hasSize( 18 ); | ||
benchmark( "split", 1000000, () -> StringUtils.splitByWholeSeparatorPreserveAllTokens( tsv, "\t" ) ) | ||
.run(); | ||
benchmark( "tokenizer", 1000000, () -> parse( tsv, DELIMITER_TAB ) ) | ||
.run(); | ||
} | ||
|
||
} |
Oops, something went wrong.