Skip to content

Commit

Permalink
[synthesized-io/tdk#5375] Remove workaraounds for Oracle database (#109)
Browse files Browse the repository at this point in the history
  • Loading branch information
ViacheslavP authored Dec 13, 2024
1 parent fd2296c commit 509d292
Show file tree
Hide file tree
Showing 12 changed files with 215 additions and 134 deletions.
46 changes: 46 additions & 0 deletions .github/workflows/test_oracle.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
name: test_oracle

on:
push:
branches: [ 'main' ]
pull_request:
branches: [ '*' ]

defaults:
run:
working-directory: ./oracle

jobs:

masking:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3

- name: Build docker compose
run: |
docker compose pull
docker compose build
- name: Run databases
continue-on-error: true
run: |
docker compose run databases
- name: Run TDK
env:
SYNTHESIZED_KEY: ${{ secrets.SYNTHESIZED_LICENSE_KEY }}
run: |
docker compose run tdk
- name: Verify input data
run: >
docker compose run check scan -d input_db
-c /sodacl/configuration.yaml
/sodacl/checks_for_input_db.yaml
- name: Verify output data
run: >
docker compose run check scan -d output_db
-c /sodacl/configuration.yaml
/sodacl/checks_for_masking.yaml
9 changes: 1 addition & 8 deletions oracle/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ export SYNTHESIZED_KEY=kDpeQB...
Spin up Oracle databases:
```shell
docker compose up oracle_source oracle_target -d
docker compose run databases
```

Run TDK:
Expand All @@ -39,10 +39,3 @@ Run the PostgreSQL database:
```shell
docker compose up postgres_masked -d
```


## Knowing problems

- get rid of `last_update` columns in the cofiguration file
- truncate shcema with recursive FKs
- mask the `film.description` column
109 changes: 0 additions & 109 deletions oracle/config.tdk.yaml
Original file line number Diff line number Diff line change
@@ -1,115 +1,6 @@
default_config:
mode: MASKING

tables:
- table_name_with_schema: "TEST.CITY"
transformations:
- columns: [ "LAST_UPDATE" ]
params:
type: "date_generator"
mean: 2022-03-01T12:00:00Z
std: 7776000000
min: 2022-01-01T12:00:00Z
max: 2022-07-01T12:00:00Z

- table_name_with_schema: "TEST.CUSTOMER"
transformations:
- columns: [ "CREATE_DATE" ]
params:
type: "date_generator"
mean: 2022-03-01T12:00:00Z
std: 7776000000
min: 2022-01-01T12:00:00Z
max: 2022-07-01T12:00:00Z
- columns: [ "LAST_UPDATE" ]
params:
type: "date_generator"
mean: 2022-03-01T12:00:00Z
std: 7776000000
min: 2022-01-01T12:00:00Z
max: 2022-07-01T12:00:00Z

- table_name_with_schema: "TEST.FILM"
transformations:
- columns: [ "LAST_UPDATE" ]
params:
type: "date_generator"
mean: 2022-03-01T12:00:00Z
std: 7776000000
min: 2022-01-01T12:00:00Z
max: 2022-07-01T12:00:00Z

- table_name_with_schema: "TEST.FILM_ACTOR"
transformations:
- columns: [ "LAST_UPDATE" ]
params:
type: "date_generator"
mean: 2022-03-01T12:00:00Z
std: 7776000000
min: 2022-01-01T12:00:00Z
max: 2022-07-01T12:00:00Z

- table_name_with_schema: "TEST.ACTOR"
transformations:
- columns: [ "LAST_UPDATE" ]
params:
type: "date_generator"
mean: 2022-03-01T12:00:00Z
std: 7776000000
min: 2022-01-01T12:00:00Z
max: 2022-07-01T12:00:00Z

- table_name_with_schema: "TEST.FILM_CATEGORY"
transformations:
- columns: [ "LAST_UPDATE" ]
params:
type: "date_generator"
mean: 2022-03-01T12:00:00Z
std: 7776000000
min: 2022-01-01T12:00:00Z
max: 2022-07-01T12:00:00Z

- table_name_with_schema: "TEST.INVENTORY"
transformations:
- columns: [ "LAST_UPDATE" ]
params:
type: "date_generator"
mean: 2022-03-01T12:00:00Z
std: 7776000000
min: 2022-01-01T12:00:00Z
max: 2022-07-01T12:00:00Z

- table_name_with_schema: "TEST.PAYMENT"
transformations:
- columns: [ "LAST_UPDATE" ]
params:
type: "date_generator"
mean: 2022-03-01T12:00:00Z
std: 7776000000
min: 2022-01-01T12:00:00Z
max: 2022-07-01T12:00:00Z

- table_name_with_schema: "TEST.RENTAL"
transformations:
- columns: [ "LAST_UPDATE" ]
params:
type: "date_generator"
mean: 2022-03-01T12:00:00Z
std: 7776000000
min: 2022-01-01T12:00:00Z
max: 2022-07-01T12:00:00Z

- table_name_with_schema: "TEST.ADDRESS"
transformations:
- columns: [ "LAST_UPDATE" ]
params:
type: "date_generator"
mean: 2022-03-01T12:00:00Z
std: 7776000000
min: 2022-01-01T12:00:00Z
max: 2022-07-01T12:00:00Z


cycle_resolution_strategy: DEFER_FOREIGN_KEY
table_truncation_mode: TRUNCATE
schema_creation_mode: CREATE_IF_NOT_EXISTS
Expand Down
3 changes: 3 additions & 0 deletions oracle/create_flag.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
#!/bin/bash

touch /opt/oracle/flag
1 change: 1 addition & 0 deletions oracle/create_user.sql
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
CREATE USER test IDENTIFIED BY test;
GRANT ALL PRIVILEGES TO test;
GRANT SELECT ON V_$DATABASE TO test;
42 changes: 26 additions & 16 deletions oracle/docker-compose.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,9 @@ version: '3'

services:

oracle_source:
input_db:
image: gvenzl/oracle-xe
container_name: oracle_source
container_name: input_db
networks:
- simple-network
ports:
Expand All @@ -15,16 +15,17 @@ services:
- ./create_user.sql:/container-entrypoint-initdb.d/1.sql
- ./sakila-schema.sql:/container-entrypoint-initdb.d/2.sql
- ./oracle-sakila-insert-data.sql:/container-entrypoint-initdb.d/3.sql
- ./create_flag.sh:/container-entrypoint-initdb.d/4.sh
healthcheck:
test: "${ORACLE_BASE}/healthcheck.sh"
interval: "3s"
timeout: "3s"
start_period: "3s"
retries: 30
test: ["CMD", "sh", "-c", "if [ -e /opt/oracle/flag ]; then exit 0; else exit 1; fi"]
interval: "6s"
timeout: "6s"
start_period: "6s"
retries: 60

oracle_target:
output_db:
image: gvenzl/oracle-xe
container_name: oracle_target
container_name: output_db
networks:
- simple-network
ports:
Expand All @@ -34,22 +35,22 @@ services:
volumes:
- ./create_user.sql:/container-entrypoint-initdb.d/1.sql
healthcheck:
test: "${ORACLE_BASE}/healthcheck.sh"
test: "/opt/oracle/healthcheck.sh"
interval: "3s"
timeout: "3s"
start_period: "3s"
retries: 30

databases:
container_name: databases
image: docker/whalesay
image: synthesizedio/whalesay
command: >
cowsay
"Both databases have been started and are ready for TDK exercises!"
depends_on:
oracle_source:
input_db:
condition: service_healthy
oracle_target:
output_db:
condition: service_healthy


Expand All @@ -60,10 +61,10 @@ services:
networks:
- simple-network
environment:
SYNTHESIZED_INPUT_URL: jdbc:oracle:thin:@oracle_source:1521:xe
SYNTHESIZED_INPUT_URL: jdbc:oracle:thin:@input_db:1521:xe
SYNTHESIZED_INPUT_USERNAME: test
SYNTHESIZED_INPUT_PASSWORD: test
SYNTHESIZED_OUTPUT_URL: jdbc:oracle:thin:@oracle_target:1521:xe
SYNTHESIZED_OUTPUT_URL: jdbc:oracle:thin:@output_db:1521:xe
SYNTHESIZED_OUTPUT_USERNAME: test
SYNTHESIZED_OUTPUT_PASSWORD: test
SYNTHESIZED_USERCONFIG_FILE: /app/config.yaml
Expand All @@ -82,7 +83,9 @@ services:
volumes:
- ./config.tdk.yaml:/app/config.yaml
- ./banner.txt:/app/banner.txt

depends_on:
databases:
condition: service_completed_successfully

ora2pg:
image: georgmoser/ora2pg
Expand Down Expand Up @@ -111,6 +114,13 @@ services:
- ./postgres_output_data/COPY_TEST_output.sql:/docker-entrypoint-initdb.d/4.sql
ports: [ "5432:5432" ]

check:
container_name: check
image: sodadata/soda-core
networks:
- simple-network
volumes:
- ./soda:/sodacl

networks:
simple-network:
2 changes: 1 addition & 1 deletion oracle/ora2pg.conf
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# PG_VERSION 11

ORACLE_DSN dbi:Oracle:host=oracle_target;sid=xe;port=1521
ORACLE_DSN dbi:Oracle:host=input_db;sid=xe;port=1521
ORACLE_USER test
ORACLE_PWD test

Expand Down
47 changes: 47 additions & 0 deletions oracle/soda/checks_for_input_db.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
checks for actor:
- row_count = 200

checks for address:
- row_count = 603

checks for category:
- row_count = 16

checks for city:
- row_count = 600

checks for country:
- row_count = 109

checks for customer:
- row_count = 599

checks for film:
- row_count = 1000
# - distribution_difference(rating) < 0.01:
# method: chi_square
# distribution reference file: /sodacl/film_rating_distribution.yml

checks for film_actor:
- row_count = 5462

checks for film_category:
- row_count = 1000

checks for inventory:
- row_count = 4581

checks for language:
- row_count = 6

checks for payment:
- row_count = 16049

checks for rental:
- row_count = 16044

checks for staff:
- row_count = 2

checks for store:
- row_count = 2
47 changes: 47 additions & 0 deletions oracle/soda/checks_for_masking.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
checks for staff:
- row_count same as staff in input_db

checks for film:
- row_count same as film in input_db
- distribution_difference(rating) = 0.0:
method: swd
distribution reference file: /sodacl/masked_film_rating_distribution.yml

checks for actor:
- row_count same as actor in input_db

checks for address:
- row_count same as address in input_db

checks for category:
- row_count same as category in input_db

checks for city:
- row_count same as city in input_db

checks for country:
- row_count same as country in input_db

checks for customer:
- row_count same as customer in input_db

checks for film_actor:
- row_count same as film_actor in input_db

checks for film_category:
- row_count same as film_category in input_db

checks for inventory:
- row_count same as inventory in input_db

checks for language:
- row_count same as language in input_db

checks for payment:
- row_count same as payment in input_db

checks for rental:
- row_count same as rental in input_db

checks for store:
- row_count same as store in input_db
Loading

0 comments on commit 509d292

Please sign in to comment.