diff --git a/etl-pipeline/data_exporters/export_titanic_clean.py b/etl-pipeline/data_exporters/export_titanic_clean.py deleted file mode 100755 index cb7aa63..0000000 --- a/etl-pipeline/data_exporters/export_titanic_clean.py +++ /dev/null @@ -1,16 +0,0 @@ -from mage_ai.io.file import FileIO -from pandas import DataFrame - -if 'data_exporter' not in globals(): - from mage_ai.data_preparation.decorators import data_exporter - - -@data_exporter -def export_data_to_file(df: DataFrame, **kwargs) -> None: - """ - Template for exporting data to filesystem. - - Docs: https://docs.mage.ai/design/data-loading#example-loading-data-from-a-file - """ - filepath = 'titanic_clean.csv' - FileIO().export(df, filepath) diff --git a/etl-pipeline/data_exporters/precise_frost.py b/etl-pipeline/data_exporters/precise_frost.py deleted file mode 100644 index 3765390..0000000 --- a/etl-pipeline/data_exporters/precise_frost.py +++ /dev/null @@ -1,31 +0,0 @@ -from mage_ai.settings.repo import get_repo_path -from mage_ai.io.config import ConfigFileLoader -from mage_ai.io.postgres import Postgres -from pandas import DataFrame -from os import path - -if 'data_exporter' not in globals(): - from mage_ai.data_preparation.decorators import data_exporter - - -@data_exporter -def export_data_to_postgres(df: DataFrame, **kwargs) -> None: - """ - Template for exporting data to a PostgreSQL database. - Specify your configuration settings in 'io_config.yaml'. - - Docs: https://docs.mage.ai/design/data-loading#postgresql - """ - schema_name = 'public' # Specify the name of the schema to export data to - table_name = 'station_readings' # Specify the name of the table to export data to - config_path = path.join(get_repo_path(), 'io_config.yaml') - config_profile = 'default' - - with Postgres.with_config(ConfigFileLoader(config_path, config_profile)) as loader: - loader.export( - df, - schema_name, - table_name, - index=False, # Specifies whether to include index in exported table - if_exists='append', # Specify resolution policy if table name already exists - ) \ No newline at end of file diff --git a/etl-pipeline/transformers/collect_station_readings.py b/etl-pipeline/transformers/collect_station_readings.py deleted file mode 100644 index 3c1c474..0000000 --- a/etl-pipeline/transformers/collect_station_readings.py +++ /dev/null @@ -1,24 +0,0 @@ -import pandas as pd - -if 'transformer' not in globals(): - from mage_ai.data_preparation.decorators import transformer -if 'test' not in globals(): - from mage_ai.data_preparation.decorators import test - - -@transformer -def transform(data, *args, **kwargs): - rows = [] - - for item in data: - if item is not None: - rows.append(item[0]['station_id']) - - return pd.DataFrame(rows) - -@test -def test_output(output, *args) -> None: - """ - Template code for testing the output of the block. - """ - assert output is not None, 'The output is undefined' diff --git a/etl-pipeline/transformers/extract_last_measurement_id.sql b/etl-pipeline/transformers/extract_last_measurement_id.sql deleted file mode 100644 index c313800..0000000 --- a/etl-pipeline/transformers/extract_last_measurement_id.sql +++ /dev/null @@ -1,21 +0,0 @@ -WITH last_measurement AS ( - SELECT - max(measurement_id) as last_measurement_id - FROM - station_readings_raw - WHERE - id = '{{ block_output("extract_station_ids")["id"] }}' -) -SELECT - '{{ block_output("extract_station_ids")["id"] }}' as id, - CASE - WHEN last_measurement_id IS NULL THEN floor(random() * 9 + 1)::int - ELSE last_measurement_id - END as last_measurement_id -FROM - last_measurement; - --- SELECT --- max(measurement_id) as last_measurement_id --- FROM station_readings_raw --- WHERE id = '{{ block_output("extract_station_ids")[0]["id"] }}' \ No newline at end of file diff --git a/etl-pipeline/transformers/extract_latest_records_from_fiuna.sql b/etl-pipeline/transformers/extract_latest_records_from_fiuna.sql deleted file mode 100644 index 80f384e..0000000 --- a/etl-pipeline/transformers/extract_latest_records_from_fiuna.sql +++ /dev/null @@ -1,3 +0,0 @@ -SELECT - '{{ block_output("extract_last_measurement_id") }}' ---'{{}}' \ No newline at end of file diff --git a/etl-pipeline/transformers/extract_station_readings.py b/etl-pipeline/transformers/extract_station_readings.py deleted file mode 100644 index 90c1b30..0000000 --- a/etl-pipeline/transformers/extract_station_readings.py +++ /dev/null @@ -1,32 +0,0 @@ -if 'transformer' not in globals(): - from mage_ai.data_preparation.decorators import transformer -if 'test' not in globals(): - from mage_ai.data_preparation.decorators import test - - -@transformer -def transform(data, *args, **kwargs): - """ - Template code for a transformer block. - - Add more parameters to this function if this block has multiple parent blocks. - There should be one parameter for each output variable from each parent block. - - Args: - data: The output from the upstream parent block - args: The output from any additional upstream blocks (if applicable) - - Returns: - Anything (e.g. data frame, dictionary, array, int, str, etc.) - """ - # Specify your transformation logic here - - return data - - -@test -def test_output(output, *args) -> None: - """ - Template code for testing the output of the block. - """ - assert output is not None, 'The output is undefined' diff --git a/etl-pipeline/transformers/extract_station_readings.sql b/etl-pipeline/transformers/extract_station_readings.sql deleted file mode 100644 index aaeb0fd..0000000 --- a/etl-pipeline/transformers/extract_station_readings.sql +++ /dev/null @@ -1,2 +0,0 @@ -SELECT - '{{ block_output(parse=lambda data, _vars: data[0]["id"]) }}' AS station_id diff --git a/etl-pipeline/transformers/fill_in_missing_values.py b/etl-pipeline/transformers/fill_in_missing_values.py deleted file mode 100755 index b9761c3..0000000 --- a/etl-pipeline/transformers/fill_in_missing_values.py +++ /dev/null @@ -1,45 +0,0 @@ -from pandas import DataFrame -import math - -if 'transformer' not in globals(): - from mage_ai.data_preparation.decorators import transformer -if 'test' not in globals(): - from mage_ai.data_preparation.decorators import test - -def select_number_columns(df: DataFrame) -> DataFrame: - return df[['Age', 'Fare', 'Parch', 'Pclass', 'SibSp', 'Survived']] - - -def fill_missing_values_with_median(df: DataFrame) -> DataFrame: - for col in df.columns: - values = sorted(df[col].dropna().tolist()) - median_value = values[math.floor(len(values) / 2)] - df[[col]] = df[[col]].fillna(median_value) - return df - - -@transformer -def transform_df(df: DataFrame, *args, **kwargs) -> DataFrame: - """ - Template code for a transformer block. - - Add more parameters to this function if this block has multiple parent blocks. - There should be one parameter for each output variable from each parent block. - - Args: - df (DataFrame): Data frame from parent block. - - Returns: - DataFrame: Transformed data frame - """ - # Specify your transformation logic here - - return fill_missing_values_with_median(select_number_columns(df)) - - -@test -def test_output(df) -> None: - """ - Template code for testing the output of the block. - """ - assert df is not None, 'The output is undefined' diff --git a/etl-pipeline/transformers/iridescent_familiar.py b/etl-pipeline/transformers/iridescent_familiar.py deleted file mode 100644 index dc4565d..0000000 --- a/etl-pipeline/transformers/iridescent_familiar.py +++ /dev/null @@ -1,17 +0,0 @@ -if 'transformer' not in globals(): - from mage_ai.data_preparation.decorators import transformer -if 'test' not in globals(): - from mage_ai.data_preparation.decorators import test - - -@transformer -def transform(data, *args, **kwargs): - return data - - -@test -def test_output(output, *args) -> None: - """ - Template code for testing the output of the block. - """ - assert output is not None, 'The output is undefined' diff --git a/etl-pipeline/transformers/morning_phoenix.py b/etl-pipeline/transformers/morning_phoenix.py deleted file mode 100644 index 38e18d5..0000000 --- a/etl-pipeline/transformers/morning_phoenix.py +++ /dev/null @@ -1,32 +0,0 @@ -if 'transformer' not in globals(): - from mage_ai.data_preparation.decorators import transformer -if 'test' not in globals(): - from mage_ai.data_preparation.decorators import test - - -@transformer -def transform(data, *args, **kwargs): - """ - Template code for a transformer block. - - Add more parameters to this function if this block has multiple parent blocks. - There should be one parameter for each output variable from each parent block. - - Args: - data: The output from the upstream parent block - args: The output from any additional upstream blocks (if applicable) - - Returns: - Anything (e.g. data frame, dictionary, array, int, str, etc.) - """ - # Specify your transformation logic here - - return data - - -@test -def test_output(output, *args) -> None: - """ - Template code for testing the output of the block. - """ - assert output is not None, 'The output is undefined' \ No newline at end of file diff --git a/etl-pipeline/transformers/renewed_spellcaster.py b/etl-pipeline/transformers/renewed_spellcaster.py deleted file mode 100644 index 65577a3..0000000 --- a/etl-pipeline/transformers/renewed_spellcaster.py +++ /dev/null @@ -1,19 +0,0 @@ -from typing import Dict, List - -if 'transformer' not in globals(): - from mage_ai.data_preparation.decorators import transformer -if 'test' not in globals(): - from mage_ai.data_preparation.decorators import test - - -@transformer -def transform(data: Dict, *args, **kwargs) -> List[Dict]: - return [data] - - -@test -def test_output(output, *args) -> None: - """ - Template code for testing the output of the block. - """ - assert output is not None, 'The output is undefined' diff --git a/etl-pipeline/transformers/wise_herbalist.py b/etl-pipeline/transformers/wise_herbalist.py deleted file mode 100644 index 57bcdeb..0000000 --- a/etl-pipeline/transformers/wise_herbalist.py +++ /dev/null @@ -1,19 +0,0 @@ -from typing import List, Dict - -if 'transformer' not in globals(): - from mage_ai.data_preparation.decorators import transformer -if 'test' not in globals(): - from mage_ai.data_preparation.decorators import test - - -@transformer -def transform(data: Dict, *args, **kwargs) -> List[Dict]: - return [data] - - -@test -def test_output(output, *args) -> None: - """ - Template code for testing the output of the block. - """ - assert output is not None, 'The output is undefined'