diff --git a/.github/workflows/build-and-package.yaml b/.github/workflows/build-and-package.yaml index 37add713..2ae6f379 100644 --- a/.github/workflows/build-and-package.yaml +++ b/.github/workflows/build-and-package.yaml @@ -155,7 +155,7 @@ jobs: uses: ruby/setup-ruby@v1 with: ruby-version: '2.6' - - name: Downoad man page + - name: Download man page uses: actions/download-artifact@v2 with: name: q-man-page diff --git a/QSQL-NOTES.md b/QSQL-NOTES.md index 5f61f83a..effd0538 100644 --- a/QSQL-NOTES.md +++ b/QSQL-NOTES.md @@ -52,9 +52,9 @@ $ q "select count(*) from mydatabase1.sqlite:::mytable1 a left join mydatabase2. Running queries on sqlite databases does not usually entail loading the data into memory. Databases are attached to a virtual database and queried directly from disk. This means that querying speed is practically identical to standard sqlite access. This is also true when multiple sqlite databases are used in a single query. The same mechanism is being used by q whenever it uses a qsql file (either directly or as a cache of a delimited fild). -sqlite itself does have a pre-compiled limit of the number of databases that can be attached simultanously. If this limit is reached, then q will attach as many databases as possible, and then continue processing by loading additional tables into memory in order to execute the query. The standard limit in sqlite3 (unless compiled specifically with another limit) is 10 databases. This allows q to access as many as 8 user databases without having to load any data into memory (2 databases are always used for q's internal logic). Using more databases in a single query than this pre-compiled sqlite limit would slow things down, since some of the data would go into memory, but the query should still provide correct results. +sqlite itself does have a pre-compiled limit of the number of databases that can be attached simultaneously. If this limit is reached, then q will attach as many databases as possible, and then continue processing by loading additional tables into memory in order to execute the query. The standard limit in sqlite3 (unless compiled specifically with another limit) is 10 databases. This allows q to access as many as 8 user databases without having to load any data into memory (2 databases are always used for q's internal logic). Using more databases in a single query than this pre-compiled sqlite limit would slow things down, since some of the data would go into memory, but the query should still provide correct results. -Whenever the sqlite database file contains only one table, the table name part can be ommitted, and the user can specify only the sqlite-filename as the table name. For example, querying an sqlite database `mydatabase.sqlite` that only has one table `mytable` is possible with `q "SELECT ... FROM mydatabase.sqlite"`. There's no need to specify the table name in this case. +Whenever the sqlite database file contains only one table, the table name part can be omitted, and the user can specify only the sqlite-filename as the table name. For example, querying an sqlite database `mydatabase.sqlite` that only has one table `mytable` is possible with `q "SELECT ... FROM mydatabase.sqlite"`. There's no need to specify the table name in this case. Since `.qsql` files are also standard sqlite files, they can be queried directly as well. This allows the user to actually delete the original CSV file and use the caches as if they were the original files. For example: diff --git a/bin/q.py b/bin/q.py index 2a2f6771..e2570754 100755 --- a/bin/q.py +++ b/bin/q.py @@ -890,7 +890,7 @@ def __init__(self, sql, data_streams): qtable_name = self.sql_parts[idx + 1] # Otherwise, the next part contains the qtable name. In most cases the next part will be only the qtable name. # We handle one special case here, where this is a subquery as a column: "SELECT (SELECT ... FROM qtable),100 FROM ...". - # In that case, there will be an ending paranthesis as part of the name, and we want to handle this case gracefully. + # In that case, there will be an ending parenthesis as part of the name, and we want to handle this case gracefully. # This is obviously a hack of a hack :) Just until we have # complete parsing capabilities if ')' in qtable_name: diff --git a/doc/RATIONALE.markdown b/doc/RATIONALE.markdown index d9629f3b..f7986662 100644 --- a/doc/RATIONALE.markdown +++ b/doc/RATIONALE.markdown @@ -1,7 +1,7 @@ # q - Treating Text as a Database ## Why aren't other Linux tools enough? -The standard Linux tools are amazing and I use them all the time, but the whole idea of Linux is mixing-and-matching the best tools for each part of job. This tool adds the declarative power of SQL to the Linux toolset, without loosing any of the other tools' benefits. In fact, I often use q together with other Linux tools, the same way I pipe awk/sed and grep together all the time. +The standard Linux tools are amazing and I use them all the time, but the whole idea of Linux is mixing-and-matching the best tools for each part of job. This tool adds the declarative power of SQL to the Linux toolset, without losing any of the other tools' benefits. In fact, I often use q together with other Linux tools, the same way I pipe awk/sed and grep together all the time. One additional thing to note is that many Linux tools treat text as text and not as data. In that sense, you can look at q as a meta-tool which provides access to all the data-related tools that SQL provides (e.g. expressions, ordering, grouping, aggregation etc.). diff --git a/mkdocs/docs/index.md b/mkdocs/docs/index.md index 3d5601ce..b2059251 100644 --- a/mkdocs/docs/index.md +++ b/mkdocs/docs/index.md @@ -130,7 +130,7 @@ $ time q "select sum(c1),count(*) from myfile.csv" -C readwrite 500000500000 1000000 total_time=4.057 seconds -# Now run with `-C read`. The query will run from the cache file and not the original. As the file gets bigger, the difference will be much more noticable +# Now run with `-C read`. The query will run from the cache file and not the original. As the file gets bigger, the difference will be much more noticeable $ time q "select sum(c1),count(*) from myfile.csv" -C read 500000500000 1000000 total_time=0.229 seconds @@ -289,7 +289,7 @@ The following filename types are supported: * **sqlite3 database filenames** * **With Multiple Tables** - Add an additional `:::` for accessing a specific table. For example `mydatabase.sqlite3:::users_table`. * **With One Table Only** - Just specify the database filename, no need for a table name postfix. For example `my_single_table_database.sqlite`. -* **`.qsql` cache files** - q can auto-generate cache files for delimited files, and they can be queried directly as a table, since they contain only one table, as they are essentially standard sqlite datbases +* **`.qsql` cache files** - q can auto-generate cache files for delimited files, and they can be queried directly as a table, since they contain only one table, as they are essentially standard sqlite databases Use `-H` to signify that the input contains a header line. Column names will be detected automatically in that case, and can be used in the query. If this option is not provided, columns will be named cX, starting with 1 (e.g. `q "SELECT c3,c8 from ..."`). @@ -610,7 +610,7 @@ SQL is a declarative language for data, and as such it allows me to define what The goal of this tool is to provide a bridge between the world of text files and of SQL. ### Why aren't other Linux tools enough? -The standard Linux tools are amazing and I use them all the time, but the whole idea of Linux is mixing-and-matching the best tools for each part of job. This tool adds the declarative power of SQL to the Linux toolset, without loosing any of the other tools' benefits. In fact, I often use q together with other Linux tools, the same way I pipe awk/sed and grep together all the time. +The standard Linux tools are amazing and I use them all the time, but the whole idea of Linux is mixing-and-matching the best tools for each part of job. This tool adds the declarative power of SQL to the Linux toolset, without losing any of the other tools' benefits. In fact, I often use q together with other Linux tools, the same way I pipe awk/sed and grep together all the time. One additional thing to note is that many Linux tools treat text as text and not as data. In that sense, you can look at q as a meta-tool which provides access to all the data-related tools that SQL provides (e.g. expressions, ordering, grouping, aggregation etc.). diff --git a/run-benchmark b/run-benchmark index f6556508..12f6304d 100755 --- a/run-benchmark +++ b/run-benchmark @@ -93,7 +93,7 @@ fi echo "==== testing octosql ===" if [[ -f $Q_BENCHMARK_RESULTS_FOLDER/octosql.benchmark-results ]] then - echo "Results files for octosql aready exist. Skipping benchmark for octosql" + echo "Results files for octosql already exist. Skipping benchmark for octosql" else pytest -m benchmark -k test_octosql_matrix -v -s $PYTEST_OPTIONS RESULT_FILE="octosql*.benchmark-results" diff --git a/test/BENCHMARK.md b/test/BENCHMARK.md index c8ec866d..672c068b 100644 --- a/test/BENCHMARK.md +++ b/test/BENCHMARK.md @@ -26,7 +26,7 @@ q "select a.*,b.* from my_file.csv.qsql a left join some-sqlite3-database:::some NOTE: In the current version, caching is not enabled by default - Use `-C readwrite` to enable reading+writing cache files, or `-C read` to just read any existing cache files. A `~/.qrc` file can be added in order to make these options the default if you want. -The benchmark results below reflect the peformance without the caching, e.g. directly reading the delimited files, parsing them and performing the query. +The benchmark results below reflect the performance without the caching, e.g. directly reading the delimited files, parsing them and performing the query. I'll update benchmark results later on to provide cached results as well. diff --git a/test/test_suite.py b/test/test_suite.py index aaa7476f..6e708b5d 100755 --- a/test/test_suite.py +++ b/test/test_suite.py @@ -2189,7 +2189,7 @@ def test_gzipped_file(self): class DelimiterTests(AbstractQTestCase): - def test_delimition_mistake_with_header(self): + def test_delimitation_mistake_with_header(self): tmpfile = self.create_file_with_data(sample_data_no_header) cmd = Q_EXECUTABLE + ' -d " " "select * from %s" -H' % tmpfile.name @@ -2204,7 +2204,7 @@ def test_delimition_mistake_with_header(self): self.cleanup(tmpfile) - def test_tab_delimition_parameter(self): + def test_tab_delimitation_parameter(self): tmpfile = self.create_file_with_data( sample_data_no_header.replace(six.b(","), six.b("\t"))) cmd = Q_EXECUTABLE + ' -t "select c1,c2,c3 from %s"' % tmpfile.name @@ -2219,7 +2219,7 @@ def test_tab_delimition_parameter(self): self.cleanup(tmpfile) - def test_pipe_delimition_parameter(self): + def test_pipe_delimitation_parameter(self): tmpfile = self.create_file_with_data( sample_data_no_header.replace(six.b(","), six.b("|"))) cmd = Q_EXECUTABLE + ' -p "select c1,c2,c3 from %s"' % tmpfile.name @@ -2234,7 +2234,7 @@ def test_pipe_delimition_parameter(self): self.cleanup(tmpfile) - def test_tab_delimition_parameter__with_manual_override_attempt(self): + def test_tab_delimitation_parameter__with_manual_override_attempt(self): tmpfile = self.create_file_with_data( sample_data_no_header.replace(six.b(","), six.b("\t"))) cmd = Q_EXECUTABLE + ' -t -d , "select c1,c2,c3 from %s"' % tmpfile.name @@ -2250,7 +2250,7 @@ def test_tab_delimition_parameter__with_manual_override_attempt(self): self.cleanup(tmpfile) - def test_pipe_delimition_parameter__with_manual_override_attempt(self): + def test_pipe_delimitation_parameter__with_manual_override_attempt(self): tmpfile = self.create_file_with_data( sample_data_no_header.replace(six.b(","), six.b("|"))) cmd = Q_EXECUTABLE + ' -p -d , "select c1,c2,c3 from %s"' % tmpfile.name @@ -2887,7 +2887,7 @@ def test_multiline_escaped_double_quoted_values_in_quoted_data(self): self.cleanup(tmp_data_file) def test_disable_double_double_quoted_data_flag__values(self): - # This test (and flag) is meant to verify backward comptibility only. It is possible that + # This test (and flag) is meant to verify backward compatibility only. It is possible that # this flag will be removed completely in the future tmp_data_file = self.create_file_with_data(double_double_quoted_data) @@ -2925,7 +2925,7 @@ def test_disable_double_double_quoted_data_flag__values(self): self.cleanup(tmp_data_file) def test_disable_escaped_double_quoted_data_flag__values(self): - # This test (and flag) is meant to verify backward comptibility only. It is possible that + # This test (and flag) is meant to verify backward compatibility only. It is possible that # this flag will be removed completely in the future tmp_data_file = self.create_file_with_data(escaped_double_quoted_data) @@ -2963,7 +2963,7 @@ def test_disable_escaped_double_quoted_data_flag__values(self): self.cleanup(tmp_data_file) def test_combined_quoted_data_flags__number_of_columns_detected(self): - # This test (and flags) is meant to verify backward comptibility only. It is possible that + # This test (and flags) is meant to verify backward compatibility only. It is possible that # these flags will be removed completely in the future tmp_data_file = self.create_file_with_data(combined_quoted_data) @@ -4827,7 +4827,7 @@ def test_column_formatting_with_output_header(self): self.assertEqual(o[0], six.b('mysum myavg')) self.assertEqual(o[1], six.b('55.000 5.500')) - def py3_test_successfuly_parse_universal_newlines_without_explicit_flag(self): + def py3_test_successfully_parse_universal_newlines_without_explicit_flag(self): def list_as_byte_list(l): return list(map(lambda x:six.b(x),l)) @@ -4854,7 +4854,7 @@ def list_as_byte_list(l): self.cleanup(tmp_data_file) - test_parsing_universal_newlines_without_explicit_flag = py3_test_successfuly_parse_universal_newlines_without_explicit_flag + test_parsing_universal_newlines_without_explicit_flag = py3_test_successfully_parse_universal_newlines_without_explicit_flag def test_universal_newlines_parsing_flag(self): def list_as_byte_list(l): @@ -5609,7 +5609,7 @@ def _prepare_test_file(self, lines, columns): self.assertEqual(r, 0) # Create file cache as part of preparation r, o, e = run_command(Q_EXECUTABLE + ' -C readwrite -d , "select count(*) from %s"' % filename) - self.asserEqual(r, 0) + self.assertEqual(r, 0) return filename def _decide_result(self,attempt_results):