Skip to content

Commit

Permalink
transform pull request - switch to single column PK
Browse files Browse the repository at this point in the history
Vertica does not support multi-column PK in incremental loads

I created an issue for that:
vertica/dbt-vertica#109
  • Loading branch information
jaceksan committed Oct 3, 2023
1 parent e600747 commit 9e1f017
Show file tree
Hide file tree
Showing 2 changed files with 11 additions and 7 deletions.
10 changes: 7 additions & 3 deletions data_pipeline/models/github/pull_requests/pull_requests.sql
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
{'columns': ['created_at'], 'unique': false}
],
materialized='incremental',
unique_key=['pull_request_number', 'repo_id']
unique_key='id'
) }}

with using_clause as (
Expand All @@ -22,7 +22,7 @@ updates as (
select *
from using_clause
{% if is_incremental() %}
where (pull_request_number, repo_id) in ( select pull_request_number, repo_id from {{ this }} )
where id in ( select id from {{ this }} )
{% else %}
-- No updates when doing full load
where 1 = 0
Expand All @@ -33,7 +33,7 @@ inserts as (
select *
from using_clause
{% if is_incremental() %}
where (pull_request_number, repo_id) not in ( select pull_request_number, repo_id from {{ this }} )
where id not in ( select id from {{ this }} )
{% endif %}
),

Expand All @@ -47,6 +47,10 @@ repos as (

final as (
select
-- Have to SELECT "id" here for incremental processing
-- It is not defined in schema.yml, it is not exposed to GoodData
id,
-- replace useless internal PR ID with something meaningful
repos.repo_name || '/' || p.pull_request_number as pull_request_id,
p.pull_request_number,
p.pull_request_url,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,14 +6,15 @@
{'columns': ['created_at'], 'unique': false}
],
materialized='incremental',
unique_key=['pull_request_number', 'repo_id']
unique_key='id'
) }}

-- Helper step, materialize extracted JSON fields first and then JOIN it with other tables
-- Incremental mode

with using_clause as (
select
id,
number as pull_request_number,
html_url as pull_request_url,
title as pull_request_title,
Expand All @@ -24,7 +25,6 @@ with using_clause as (
merged_at,
closed_at,
{{ extract_json_value('user', 'id', 'user_id', 'INT') }}
--CAST(json_extract_path_text(to_json("{{ get_db_entity_name('user') }}"), 'id') as INT) as user_id
from {{ var("input_schema_github") }}.pull_requests
{% if is_incremental() %}
where created_at > ( select max(created_at) from {{ this }} )
Expand All @@ -35,7 +35,7 @@ updates as (
select *
from using_clause
{% if is_incremental() %}
where (pull_request_number, repo_id) in ( select pull_request_number, repo_id from {{ this }} )
where id in ( select id from {{ this }} )
{% else %}
-- No updates when doing full load
where 1 = 0
Expand All @@ -46,7 +46,7 @@ inserts as (
select *
from using_clause
{% if is_incremental() %}
where (pull_request_number, repo_id) not in ( select pull_request_number, repo_id from {{ this }} )
where id not in ( select id from {{ this }} )
{% endif %}
),

Expand Down

0 comments on commit 9e1f017

Please sign in to comment.