-
Notifications
You must be signed in to change notification settings - Fork 53
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[DRAFT] Build Enriched Traces & Transactions + Aggregation Tables #1161
base: main
Are you sure you want to change the base?
Changes from 29 commits
c4353f0
5ff10a9
a2eade6
ee2d62d
ebeb470
cc12c72
443e6c4
b33ee03
dd03a2a
ccc8ed9
563ad68
365ee57
ae2f95c
6503ab7
e4a2c77
a785060
7905ab0
6592682
1617cba
09cfe79
012bed3
379100c
9d09853
a185a85
0fc755f
e705cac
28d7b5f
61cab69
ae7b7db
dd00b26
b11df41
7112e82
8164f9a
6e03596
45ed343
9f147e2
afeb39d
f6da4d4
0633750
e2e57ae
d634da1
70b34f5
2190036
a49845e
0489465
9226dde
cbea965
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Large diffs are not rendered by default.
Large diffs are not rendered by default.
This file was deleted.
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,21 @@ | ||
# import duckdb | ||
|
||
# from op_analytics.datapipeline.models.compute.querybuilder import TemplatedSQLQuery | ||
# from op_analytics.datapipeline.models.compute.registry import register_model | ||
# from op_analytics.datapipeline.models.compute.types import NamedRelations | ||
|
||
|
||
# @register_model( | ||
# input_datasets=["intermediate/refined_transactions_fees_v1"], | ||
# expected_outputs=["summary_v1"], | ||
# auxiliary_views=[ | ||
# TemplatedSQLQuery( | ||
# template_name="daily_address_summary", | ||
# context={}, | ||
# ), | ||
# ], | ||
# ) | ||
# def daily_address_summary_old(duckdb_client: duckdb.DuckDBPyConnection) -> NamedRelations: | ||
# return { | ||
# "summary_v1": duckdb_client.view("daily_address_summary"), | ||
# } |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,26 @@ | ||
import duckdb | ||
|
||
from op_analytics.datapipeline.models.compute.querybuilder import TemplatedSQLQuery | ||
from op_analytics.datapipeline.models.compute.registry import register_model | ||
from op_analytics.datapipeline.models.compute.types import NamedRelations | ||
|
||
|
||
@register_model( | ||
input_datasets=["intermediate/enriched_transactions_v1"], | ||
expected_outputs=["daily_transactions_fees_by_to_v1"], | ||
# TODO: Uncomment if we do this as a view (or some element as a view) | ||
# auxiliary_views=[ | ||
# TemplatedSQLQuery( | ||
# template_name="daily_transactions_fees_by_to", | ||
# context={}, | ||
# ), | ||
# ], | ||
) | ||
def daily_transactions_fees_by_to(duckdb_client: duckdb.DuckDBPyConnection) -> NamedRelations: | ||
return { | ||
"daily_transactions_fees_by_to_v1": duckdb_client.view( | ||
""" | ||
TODO: AGGREGATION CODE | ||
""" | ||
), | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,31 @@ | ||
# # TO DEPRECATE? | ||
|
||
# import duckdb | ||
|
||
# from op_analytics.datapipeline.models.compute.querybuilder import TemplatedSQLQuery | ||
# from op_analytics.datapipeline.models.compute.registry import register_model | ||
# from op_analytics.datapipeline.models.compute.types import NamedRelations | ||
|
||
|
||
# @register_model( | ||
# input_datasets=["ingestion/logs_v1", "ingestion/transactions_v1", "ingestion/blocks_v1"], | ||
# expected_outputs=["event_emitting_transactions_v1"], | ||
# auxiliary_views=[ | ||
# TemplatedSQLQuery( | ||
# template_name="refined_transactions_fees", | ||
# context={}, | ||
# ), | ||
# TemplatedSQLQuery( | ||
# template_name="logs_topic0_filters", | ||
# context={}, | ||
# ), | ||
# TemplatedSQLQuery( | ||
# template_name="event_emitting_transactions", | ||
# context={}, | ||
# ), | ||
# ], | ||
# ) | ||
# def event_emitting_transactions(duckdb_client: duckdb.DuckDBPyConnection) -> NamedRelations: | ||
# return { | ||
# "event_emitting_transactions_v1": duckdb_client.view("event_emitting_transactions"), | ||
# } |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,21 @@ | ||
# import duckdb | ||
|
||
# from op_analytics.datapipeline.models.compute.querybuilder import TemplatedSQLQuery | ||
# from op_analytics.datapipeline.models.compute.registry import register_model | ||
# from op_analytics.datapipeline.models.compute.types import NamedRelations | ||
|
||
|
||
# @register_model( | ||
# input_datasets=["ingestion/traces_v1", "refined_transactions_fees_v1"], | ||
# expected_outputs=["refined_trace_calls_v1"], | ||
# auxiliary_views=[ | ||
# TemplatedSQLQuery( | ||
# template_name="refined_trace_calls", | ||
# context={}, | ||
# ), | ||
# ], | ||
# ) | ||
# def refined_trace_calls(duckdb_client: duckdb.DuckDBPyConnection) -> NamedRelations: | ||
# return { | ||
# "refined_trace_calls_v1": duckdb_client.view("refined_trace_calls"), | ||
# } |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,69 @@ | ||
import duckdb | ||
|
||
from op_analytics.datapipeline.models.compute.querybuilder import TemplatedSQLQuery | ||
from op_analytics.datapipeline.models.compute.registry import register_model | ||
from op_analytics.datapipeline.models.compute.types import NamedRelations | ||
|
||
|
||
@register_model( | ||
input_datasets=[ | ||
"ingestion/transactions_v1", | ||
"ingestion/blocks_v1", | ||
"ingestion/logs_v1", | ||
"ingestion/traces_v1", | ||
], | ||
expected_outputs=[ | ||
"refined_transactions_fees_v1", | ||
"refined_trace_calls_v1", | ||
"event_emitting_transactions_v1", | ||
"summary_v1", | ||
], | ||
auxiliary_views=[ | ||
TemplatedSQLQuery( | ||
template_name="refined_transactions_fees", | ||
context={}, | ||
), | ||
TemplatedSQLQuery( | ||
template_name="refined_trace_calls", | ||
context={}, | ||
), | ||
TemplatedSQLQuery( | ||
template_name="logs_topic0_filters", | ||
context={}, | ||
), | ||
TemplatedSQLQuery( | ||
template_name="event_emitting_transactions", | ||
context={}, | ||
), | ||
TemplatedSQLQuery( | ||
template_name="daily_address_summary", | ||
context={}, | ||
), | ||
TemplatedSQLQuery( | ||
template_name="refined_trace_calls_agg_from_to_hash", | ||
context={}, | ||
), | ||
TemplatedSQLQuery( | ||
template_name="refined_trace_calls_agg_to_hash", | ||
context={}, | ||
), | ||
TemplatedSQLQuery( | ||
template_name="daily_trace_calls_agg_to", | ||
context={}, | ||
), | ||
], | ||
) | ||
def refined_transactions_traces_address_models( | ||
duckdb_client: duckdb.DuckDBPyConnection, | ||
) -> NamedRelations: | ||
return { | ||
"refined_transactions_fees_v1": duckdb_client.view("refined_transactions_fees"), | ||
"refined_trace_calls_v1": duckdb_client.view("refined_trace_calls"), | ||
"event_emitting_transactions_v1": duckdb_client.view("event_emitting_transactions"), | ||
"summary_v1": duckdb_client.view("daily_address_summary"), | ||
"refined_trace_calls_agg_from_to_hash_v1": duckdb_client.view( | ||
"refined_trace_calls_agg_from_to_hash" | ||
), | ||
"refined_trace_calls_agg_to_hash_v1": duckdb_client.view("refined_trace_calls_agg_to_hash"), | ||
"daily_trace_calls_agg_to_v1": duckdb_client.view("daily_trace_calls_agg_to"), | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -15,6 +15,9 @@ def create_duckdb_macros(duckdb_client: duckdb.DuckDBPyConnection): | |
|
||
CREATE OR REPLACE MACRO wei_to_gwei(a) | ||
AS a::DECIMAL(28, 0) * 0.000000001::DECIMAL(10, 10); | ||
|
||
CREATE OR REPLACE MACRO gwei_to_eth(a) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. modified decimal precision here, it wasn't allowing for a to be a decimal before |
||
AS a::DECIMAL(28, 10) * 0.000000001::DECIMAL(10, 10); | ||
|
||
CREATE OR REPLACE MACRO safe_div(a, b) AS | ||
IF(b = 0, NULL, a / b); | ||
|
@@ -27,14 +30,34 @@ def create_duckdb_macros(duckdb_client: duckdb.DuckDBPyConnection): | |
-- Truncate a timestamp to hour. | ||
CREATE OR REPLACE MACRO epoch_to_hour(a) AS | ||
date_trunc('hour', make_timestamp(a * 1000000::BIGINT)); | ||
|
||
-- Truncate a timestamp to day. | ||
CREATE OR REPLACE MACRO epoch_to_day(a) AS | ||
date_trunc('day', make_timestamp(a * 1000000::BIGINT)); | ||
|
||
-- Division by 16 for DECIMAL types. | ||
CREATE OR REPLACE MACRO div16(a) | ||
AS a * 0.0625::DECIMAL(5, 5); | ||
|
||
--Get the length in bytes for binary data that is encoded as a hex string | ||
CREATE OR REPLACE MACRO hexstr_bytelen(x) | ||
AS (length(x) - 2) / 2 | ||
AS (length(x) - 2) / 2; | ||
|
||
--Count non-zero bytes for binary data that is encoded as a hex string. We don't use hexstr_bytelen because we need to substring the input data. | ||
CREATE OR REPLACE MACRO hexstr_nonzero_bytes(x) | ||
AS length(replace(hex(unhex(substr(x, 3))), '00', '')) / 2; | ||
|
||
--Count non-zero bytes for binary data that is encoded as a hex string | ||
CREATE OR REPLACE MACRO hexstr_zero_bytes(x) | ||
AS hexstr_bytelen(x) - hexstr_nonzero_bytes(x); | ||
|
||
--Calculate calldata gas used for binary data that is encoded as a hex string (can be updated by an EIP) | ||
CREATE OR REPLACE MACRO hexstr_calldata_gas(x) | ||
AS 16*hexstr_nonzero_bytes(x) + 4*hexstr_zero_bytes(x); | ||
|
||
--Get the method id for input data. This is the first 4 bytes, or first 10 string characters for binary data that is encoded as a hex string. | ||
CREATE OR REPLACE MACRO hexstr_method_id(x) | ||
AS substring(x,1,10) | ||
""") | ||
|
||
|
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -2,6 +2,7 @@ SELECT | |
dt, | ||
chain, | ||
chain_id, | ||
network, | ||
from_address AS address, | ||
-- Aggregates | ||
|
||
|
@@ -43,9 +44,9 @@ SELECT | |
|
||
sum(if(success, receipt_gas_used, 0)) AS success_l2_gas_used_sum, | ||
|
||
sum(l1_gas_used) AS l1_gas_used_sum, | ||
sum(l1_gas_used_unified) AS l1_gas_used_unified_sum, | ||
|
||
sum(if(success, l1_gas_used, 0)) AS success_l1_gas_used_sum, | ||
sum(if(success, l1_gas_used_unified, 0)) AS success_l1_gas_used_unified_sum, | ||
|
||
wei_to_eth(sum(tx_fee)) AS tx_fee_sum_eth, | ||
|
||
|
@@ -58,7 +59,7 @@ SELECT | |
|
||
wei_to_eth(sum(l2_priority_fee)) AS l2_priority_fee_sum_eth, | ||
|
||
wei_to_eth(sum(l2_base_legacy)) AS l2_base_legacy_fee_sum_eth, | ||
wei_to_eth(sum(l2_legacy_extra_fee)) AS l2_base_legacy_fee_sum_eth, | ||
|
||
-- L1 Fee and breakdown into BASE + BLOB | ||
wei_to_eth(sum(l1_fee)) AS l1_fee_sum_eth, | ||
|
@@ -82,11 +83,29 @@ SELECT | |
) AS l1_base_price_avg_gwei, | ||
|
||
wei_to_gwei(safe_div(sum(l1_blob_fee), sum(l1_blob_scaled_size))) | ||
AS l1_blob_fee_avg_gwei | ||
AS l1_blob_fee_avg_gwei, | ||
|
||
-- Data Processed | ||
sum(input_zero_bytes) AS input_zero_bytes_sum, | ||
sum(if(success, input_zero_bytes, 0)) AS success_input_zero_bytes_sum, | ||
|
||
sum(input_nonzero_bytes) AS input_nonzero_bytes_sum, | ||
sum(if(success, input_nonzero_bytes, 0)) AS success_input_nonzero_bytes_sum, | ||
|
||
sum(input_byte_length) AS input_byte_length_sum, | ||
sum(if(success, input_byte_length, 0)) AS success_input_byte_length_sum, | ||
|
||
sum(estimated_size) AS estimated_size_sum, | ||
sum(if(success, estimated_size, 0)) AS success_estimated_size_sum | ||
|
||
FROM | ||
transaction_fees | ||
refined_transactions_fees | ||
WHERE | ||
NOT is_system_transaction | ||
AND gas_price > 0 | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. this used to be at the transactions_fees level, but I unfiltered there (so we preserve those transactions) and brought it here. |
||
GROUP BY | ||
1, | ||
2, | ||
3, | ||
4 | ||
4, | ||
5 |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
should we rename this to
daily_address_summary_v1
? Maybe in a future PR to keep this clean-ish