From 91dfbe592e85fd37e368622f8d611e31ee14612d Mon Sep 17 00:00:00 2001
From: David Raznick <kindly@gmail.com>
Date: Tue, 18 Jun 2024 13:27:11 +0100
Subject: [PATCH] add truncate

---
 Cargo.lock            | 14 +++++++-------
 Cargo.toml            |  6 +++---
 docs/changelog.md     |  8 ++++++++
 docs/options.md       | 22 ++++++++++++++++++++++
 flatterer/__init__.py |  9 +++++++--
 src/lib.rs            |  8 ++++++--
 6 files changed, 53 insertions(+), 14 deletions(-)

diff --git a/Cargo.lock b/Cargo.lock
index 8b8f7bd..2de4af0 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -1305,9 +1305,9 @@ dependencies = [
 
 [[package]]
 name = "csvs_convert"
-version = "0.8.10"
+version = "0.8.11"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "a0d6580b34f2d1b9da04bcff7c73f9a0e0d78385ed59955563122fdfdeb5c406"
+checksum = "3393a3573887f267781d0b0ffb3d117d549bd1682bbfe236fa802801dbd6c248"
 dependencies = [
  "chrono",
  "counter",
@@ -1665,7 +1665,7 @@ dependencies = [
 
 [[package]]
 name = "flatterer"
-version = "0.19.15"
+version = "0.19.17"
 dependencies = [
  "clap",
  "crossbeam-channel",
@@ -1684,9 +1684,9 @@ dependencies = [
 
 [[package]]
 name = "flatterer-web"
-version = "0.19.14"
+version = "0.19.16"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "54f937c74b39af498972eb3dc27ba9f6780107b7104f203eb5888109e2c616d5"
+checksum = "013ddead49c6def498bd6e4e293c6ed69cd91a8cd2ac7f8755121779272ecf5d"
 dependencies = [
  "async-std",
  "csv",
@@ -2490,9 +2490,9 @@ dependencies = [
 
 [[package]]
 name = "libflatterer"
-version = "0.19.14"
+version = "0.19.16"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f67e98b7472987d4b4cbda4a97c3c1b0323bb5666928efe34b20e5cf60dc6c72"
+checksum = "98b4765acf315633f7743bd50772ac060b192e2d11fd40ae64743473518169ed"
 dependencies = [
  "arrow-array 51.0.0",
  "arrow-schema 51.0.0",
diff --git a/Cargo.toml b/Cargo.toml
index 1516578..6e25bf1 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "flatterer"
-version = "0.19.16"
+version = "0.19.17"
 authors = ["David Raznick <kindly@gmail.com>"]
 edition = "2021"
 license = "MIT"
@@ -14,9 +14,9 @@ serde_json = { version = "1.0.83", features = ["preserve_order"] }
 pyo3 = { version = "0.18.3", features = ["extension-module", "eyre"] }
 eyre = "0.6.8"
 #libflatterer={path = "../libflatterer"}
-libflatterer = "0.19.14"
+libflatterer = "0.19.16"
 
-flatterer-web = "0.19.14"
+flatterer-web = "0.19.16"
 #flatterer-web={path = "../flatterer-web"}
 
 env_logger = "0.10.1"
diff --git a/docs/changelog.md b/docs/changelog.md
index c1e8bef..0b6b489 100644
--- a/docs/changelog.md
+++ b/docs/changelog.md
@@ -4,6 +4,14 @@ All notable changes to this project will be documented in this file.
 
 and this project adheres to [Semantic Versioning](http://semver.org/).
 
+## [0.19.17] - 2024-06-18
+
+### New
+- truncate postgres
+
+### Fixed
+- timezone date types now accepted in postgres
+
 ## [0.19.15] - 2024-05-09
 
 ### Fixed
diff --git a/docs/options.md b/docs/options.md
index 502011a..267e463 100644
--- a/docs/options.md
+++ b/docs/options.md
@@ -60,6 +60,8 @@ Options:
                               tables to fit data
   --drop                      When loading to postgres or sqlite, drop table
                               if already exists.
+  --truncate                  When loading to postgres or sqlite, truncate table
+                              if already exists.
   --id-prefix TEXT            Prefix for all `_link` id fields
   --stats                     Produce stats about the data in the
                               datapackage.json file
@@ -475,6 +477,26 @@ import flatterer
 flatterer.flatten('inputfile.json', 'ouput_dir', postgres='postgres://user:pass@host/dbname', drop=True)
 ```
 
+## Truncate Tables
+
+**Warning: this could mean you loose data**
+
+For postgres and sqlite. Truncate the existing table if it exists. This is useful if you want to load the data into a databse with the schema pre-defined.
+
+### CLI Usage
+
+```bash 
+flatterer --postgres='postgres://user:pass@host/dbname' --sqlite-path=sqlite.db INPUT_FILE OUTPUT_DIRECTORY --truncate
+```
+
+### Python Usage
+
+```python
+import flatterer
+
+flatterer.flatten('inputfile.json', 'ouput_dir', postgres='postgres://user:pass@host/dbname', truncate=True)
+```
+
 ## Fields File
 
 Path to fields CSV file.  The fields file can be used for:
diff --git a/flatterer/__init__.py b/flatterer/__init__.py
index d77456d..453056d 100644
--- a/flatterer/__init__.py
+++ b/flatterer/__init__.py
@@ -97,6 +97,7 @@ def flatten(
     gzip_input=False,
     json_path="",
     arrays_new_table=False,
+    truncate=False,
 ):
     global LOGGING_SETUP
     if not LOGGING_SETUP:
@@ -144,7 +145,8 @@ def flatten(
                        table_prefix, id_prefix, emit_obj, force,  
                        schema, schema_titles, path, json_stream, ndjson, 
                        sqlite_path, threads, log_error, postgres, postgres_schema, 
-                       drop, pushdown, sql_scripts, evolve, no_link, stats, low_disk, low_memory, gzip_input, json_path, arrays_new_table)
+                       drop, pushdown, sql_scripts, evolve, no_link, stats, low_disk, low_memory, 
+                       gzip_input, json_path, arrays_new_table, truncate)
         elif method == 'iter':
             if path:
                 raise AttributeError("path not allowed when supplying an iterator")
@@ -157,7 +159,7 @@ def flatten(
                        table_prefix, id_prefix, emit_obj, force,  
                        schema, schema_titles, sqlite_path, threads, log_error, 
                        postgres, postgres_schema, drop, pushdown, sql_scripts, evolve, 
-                       no_link, stats, low_disk, low_memory, gzip_input, json_path, arrays_new_table)
+                       no_link, stats, low_disk, low_memory, gzip_input, json_path, arrays_new_table, truncate)
         else:
             raise AttributeError("input needs to be a string or a generator of strings, dicts or bytes")
 
@@ -241,6 +243,7 @@ def iterator_flatten(*args, **kw):
 @click.option('--postgres-schema', default="", help='When loading to postgres, put all tables into this schema.')
 @click.option('--evolve', is_flag=True, default=False, help='When loading to postgres or sqlite, evolve tables to fit data')
 @click.option('--drop', is_flag=True, default=False, help='When loading to postgres or sqlite, drop table if already exists.')
+@click.option('--truncate', is_flag=True, default=False, help='When loading to postgres or sqlite, truncate the table if it alraedy exists.')
 @click.option('--id-prefix', default="", help='Prefix for all `_link` id fields')
 @click.option('--stats', is_flag=True, default=False, help='Produce stats about the data in the datapackage.json file')
 @click.argument('inputs', required=False, nargs=-1)
@@ -280,6 +283,7 @@ def cli(
     stats=False,
     json_path="",
     arrays_new_table=False,
+    truncate=False
 ):
     if web:
         import pathlib
@@ -347,6 +351,7 @@ def cli(
                 stats=stats,
                 json_path=json_path,
                 arrays_new_table=arrays_new_table,
+                truncate=truncate,
                 )
     except IOError:
         pass
diff --git a/src/lib.rs b/src/lib.rs
index ae38426..4db71a1 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -77,7 +77,8 @@ fn flatterer(_py: Python, m: &PyModule) -> PyResult<()> {
         low_memory:bool,
         gzip_input:bool,
         json_path_selector: String,
-        arrays_new_table: bool
+        arrays_new_table: bool,
+        truncate: bool,
     ) -> Result<()> {
 
         let mut op = Options::default();
@@ -118,6 +119,7 @@ fn flatterer(_py: Python, m: &PyModule) -> PyResult<()> {
         op.gzip_input = gzip_input;
         op.json_path_selector = json_path_selector;
         op.arrays_new_table = arrays_new_table;
+        op.truncate = truncate;
 
 
         if let Err(err) = flatten_all(input_files, output_dir, op) {
@@ -169,7 +171,8 @@ fn flatterer(_py: Python, m: &PyModule) -> PyResult<()> {
         low_memory:bool,
         gzip_input:bool,
         json_path_selector: String,
-        arrays_new_table: bool
+        arrays_new_table: bool,
+        truncate: bool,
     ) -> Result<()> {
         let mut options = Options::default();
 
@@ -206,6 +209,7 @@ fn flatterer(_py: Python, m: &PyModule) -> PyResult<()> {
         options.gzip_input = gzip_input;
         options.json_path_selector = json_path_selector;
         options.arrays_new_table = arrays_new_table;
+        options.truncate = truncate;
 
         let final_output_path = PathBuf::from(output_dir);
         let parts_path = final_output_path.join("parts");