From 91dfbe592e85fd37e368622f8d611e31ee14612d Mon Sep 17 00:00:00 2001 From: David Raznick Date: Tue, 18 Jun 2024 13:27:11 +0100 Subject: [PATCH] add truncate --- Cargo.lock | 14 +++++++------- Cargo.toml | 6 +++--- docs/changelog.md | 8 ++++++++ docs/options.md | 22 ++++++++++++++++++++++ flatterer/__init__.py | 9 +++++++-- src/lib.rs | 8 ++++++-- 6 files changed, 53 insertions(+), 14 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 8b8f7bd..2de4af0 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1305,9 +1305,9 @@ dependencies = [ [[package]] name = "csvs_convert" -version = "0.8.10" +version = "0.8.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a0d6580b34f2d1b9da04bcff7c73f9a0e0d78385ed59955563122fdfdeb5c406" +checksum = "3393a3573887f267781d0b0ffb3d117d549bd1682bbfe236fa802801dbd6c248" dependencies = [ "chrono", "counter", @@ -1665,7 +1665,7 @@ dependencies = [ [[package]] name = "flatterer" -version = "0.19.15" +version = "0.19.17" dependencies = [ "clap", "crossbeam-channel", @@ -1684,9 +1684,9 @@ dependencies = [ [[package]] name = "flatterer-web" -version = "0.19.14" +version = "0.19.16" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "54f937c74b39af498972eb3dc27ba9f6780107b7104f203eb5888109e2c616d5" +checksum = "013ddead49c6def498bd6e4e293c6ed69cd91a8cd2ac7f8755121779272ecf5d" dependencies = [ "async-std", "csv", @@ -2490,9 +2490,9 @@ dependencies = [ [[package]] name = "libflatterer" -version = "0.19.14" +version = "0.19.16" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f67e98b7472987d4b4cbda4a97c3c1b0323bb5666928efe34b20e5cf60dc6c72" +checksum = "98b4765acf315633f7743bd50772ac060b192e2d11fd40ae64743473518169ed" dependencies = [ "arrow-array 51.0.0", "arrow-schema 51.0.0", diff --git a/Cargo.toml b/Cargo.toml index 1516578..6e25bf1 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "flatterer" -version = "0.19.16" +version = "0.19.17" authors = ["David Raznick "] edition = "2021" license = "MIT" @@ -14,9 +14,9 @@ serde_json = { version = "1.0.83", features = ["preserve_order"] } pyo3 = { version = "0.18.3", features = ["extension-module", "eyre"] } eyre = "0.6.8" #libflatterer={path = "../libflatterer"} -libflatterer = "0.19.14" +libflatterer = "0.19.16" -flatterer-web = "0.19.14" +flatterer-web = "0.19.16" #flatterer-web={path = "../flatterer-web"} env_logger = "0.10.1" diff --git a/docs/changelog.md b/docs/changelog.md index c1e8bef..0b6b489 100644 --- a/docs/changelog.md +++ b/docs/changelog.md @@ -4,6 +4,14 @@ All notable changes to this project will be documented in this file. and this project adheres to [Semantic Versioning](http://semver.org/). +## [0.19.17] - 2024-06-18 + +### New +- truncate postgres + +### Fixed +- timezone date types now accepted in postgres + ## [0.19.15] - 2024-05-09 ### Fixed diff --git a/docs/options.md b/docs/options.md index 502011a..267e463 100644 --- a/docs/options.md +++ b/docs/options.md @@ -60,6 +60,8 @@ Options: tables to fit data --drop When loading to postgres or sqlite, drop table if already exists. + --truncate When loading to postgres or sqlite, truncate table + if already exists. --id-prefix TEXT Prefix for all `_link` id fields --stats Produce stats about the data in the datapackage.json file @@ -475,6 +477,26 @@ import flatterer flatterer.flatten('inputfile.json', 'ouput_dir', postgres='postgres://user:pass@host/dbname', drop=True) ``` +## Truncate Tables + +**Warning: this could mean you loose data** + +For postgres and sqlite. Truncate the existing table if it exists. This is useful if you want to load the data into a databse with the schema pre-defined. + +### CLI Usage + +```bash +flatterer --postgres='postgres://user:pass@host/dbname' --sqlite-path=sqlite.db INPUT_FILE OUTPUT_DIRECTORY --truncate +``` + +### Python Usage + +```python +import flatterer + +flatterer.flatten('inputfile.json', 'ouput_dir', postgres='postgres://user:pass@host/dbname', truncate=True) +``` + ## Fields File Path to fields CSV file. The fields file can be used for: diff --git a/flatterer/__init__.py b/flatterer/__init__.py index d77456d..453056d 100644 --- a/flatterer/__init__.py +++ b/flatterer/__init__.py @@ -97,6 +97,7 @@ def flatten( gzip_input=False, json_path="", arrays_new_table=False, + truncate=False, ): global LOGGING_SETUP if not LOGGING_SETUP: @@ -144,7 +145,8 @@ def flatten( table_prefix, id_prefix, emit_obj, force, schema, schema_titles, path, json_stream, ndjson, sqlite_path, threads, log_error, postgres, postgres_schema, - drop, pushdown, sql_scripts, evolve, no_link, stats, low_disk, low_memory, gzip_input, json_path, arrays_new_table) + drop, pushdown, sql_scripts, evolve, no_link, stats, low_disk, low_memory, + gzip_input, json_path, arrays_new_table, truncate) elif method == 'iter': if path: raise AttributeError("path not allowed when supplying an iterator") @@ -157,7 +159,7 @@ def flatten( table_prefix, id_prefix, emit_obj, force, schema, schema_titles, sqlite_path, threads, log_error, postgres, postgres_schema, drop, pushdown, sql_scripts, evolve, - no_link, stats, low_disk, low_memory, gzip_input, json_path, arrays_new_table) + no_link, stats, low_disk, low_memory, gzip_input, json_path, arrays_new_table, truncate) else: raise AttributeError("input needs to be a string or a generator of strings, dicts or bytes") @@ -241,6 +243,7 @@ def iterator_flatten(*args, **kw): @click.option('--postgres-schema', default="", help='When loading to postgres, put all tables into this schema.') @click.option('--evolve', is_flag=True, default=False, help='When loading to postgres or sqlite, evolve tables to fit data') @click.option('--drop', is_flag=True, default=False, help='When loading to postgres or sqlite, drop table if already exists.') +@click.option('--truncate', is_flag=True, default=False, help='When loading to postgres or sqlite, truncate the table if it alraedy exists.') @click.option('--id-prefix', default="", help='Prefix for all `_link` id fields') @click.option('--stats', is_flag=True, default=False, help='Produce stats about the data in the datapackage.json file') @click.argument('inputs', required=False, nargs=-1) @@ -280,6 +283,7 @@ def cli( stats=False, json_path="", arrays_new_table=False, + truncate=False ): if web: import pathlib @@ -347,6 +351,7 @@ def cli( stats=stats, json_path=json_path, arrays_new_table=arrays_new_table, + truncate=truncate, ) except IOError: pass diff --git a/src/lib.rs b/src/lib.rs index ae38426..4db71a1 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -77,7 +77,8 @@ fn flatterer(_py: Python, m: &PyModule) -> PyResult<()> { low_memory:bool, gzip_input:bool, json_path_selector: String, - arrays_new_table: bool + arrays_new_table: bool, + truncate: bool, ) -> Result<()> { let mut op = Options::default(); @@ -118,6 +119,7 @@ fn flatterer(_py: Python, m: &PyModule) -> PyResult<()> { op.gzip_input = gzip_input; op.json_path_selector = json_path_selector; op.arrays_new_table = arrays_new_table; + op.truncate = truncate; if let Err(err) = flatten_all(input_files, output_dir, op) { @@ -169,7 +171,8 @@ fn flatterer(_py: Python, m: &PyModule) -> PyResult<()> { low_memory:bool, gzip_input:bool, json_path_selector: String, - arrays_new_table: bool + arrays_new_table: bool, + truncate: bool, ) -> Result<()> { let mut options = Options::default(); @@ -206,6 +209,7 @@ fn flatterer(_py: Python, m: &PyModule) -> PyResult<()> { options.gzip_input = gzip_input; options.json_path_selector = json_path_selector; options.arrays_new_table = arrays_new_table; + options.truncate = truncate; let final_output_path = PathBuf::from(output_dir); let parts_path = final_output_path.join("parts");