From 078cfeef306a0ab2b587fccc50d5661c3029c9b7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Philip=20Dub=C3=A9?= Date: Mon, 24 Jun 2024 15:46:13 +0000 Subject: [PATCH 1/2] Send bytes as base64, remove bits (#1866) Code assumed bit was a single bit in places, sent bytes as binary which was then base64 decoded, or converted to ASCII bytes of binary digits --- .../bigquery/merge_stmt_generator.go | 2 +- flow/connectors/bigquery/qvalue_convert.go | 2 +- .../postgres/normalize_stmt_generator.go | 33 +++++++++++-------- flow/connectors/postgres/qvalue_convert.go | 14 ++------ .../postgres/schema_delta_test_constants.go | 6 ---- .../snowflake/avro_file_writer_test.go | 3 -- .../snowflake/merge_stmt_generator.go | 2 +- flow/connectors/sql/query_executor.go | 6 +--- flow/connectors/sqlserver/qvalue_convert.go | 2 -- flow/connectors/utils/cdc_store.go | 1 - flow/e2e/bigquery/bigquery_helper.go | 2 +- flow/e2e/postgres/peer_flow_pg_test.go | 4 +-- flow/model/qrecord_copy_from_source.go | 2 -- flow/model/qvalue/avro_converter.go | 4 +-- flow/model/qvalue/equals.go | 2 -- flow/model/qvalue/kind.go | 3 -- flow/model/qvalue/qvalue.go | 16 --------- flow/model/record_items.go | 17 ---------- flow/pua/peerdb.go | 2 -- 19 files changed, 30 insertions(+), 93 deletions(-) diff --git a/flow/connectors/bigquery/merge_stmt_generator.go b/flow/connectors/bigquery/merge_stmt_generator.go index b2419cbae0..163d53ae6b 100644 --- a/flow/connectors/bigquery/merge_stmt_generator.go +++ b/flow/connectors/bigquery/merge_stmt_generator.go @@ -39,7 +39,7 @@ func (m *mergeStmtGenerator) generateFlattenedCTE(dstTable string, normalizedTab castStmt = fmt.Sprintf("CAST(PARSE_JSON(JSON_VALUE(_peerdb_data, '$.%s'),wide_number_mode=>'round') AS %s) AS `%s`", column.Name, bqTypeString, shortCol) // expecting data in BASE64 format - case qvalue.QValueKindBytes, qvalue.QValueKindBit: + case qvalue.QValueKindBytes: castStmt = fmt.Sprintf("FROM_BASE64(JSON_VALUE(_peerdb_data,'$.%s')) AS `%s`", column.Name, shortCol) case qvalue.QValueKindArrayFloat32, qvalue.QValueKindArrayFloat64, qvalue.QValueKindArrayInt16, diff --git a/flow/connectors/bigquery/qvalue_convert.go b/flow/connectors/bigquery/qvalue_convert.go index 40595588db..ce2347a6c5 100644 --- a/flow/connectors/bigquery/qvalue_convert.go +++ b/flow/connectors/bigquery/qvalue_convert.go @@ -46,7 +46,7 @@ func qValueKindToBigQueryType(columnDescription *protos.FieldDescription) bigque bqField.Type = bigquery.TimeFieldType // TODO: https://github.com/PeerDB-io/peerdb/issues/189 - handle INTERVAL types again, // bytes - case qvalue.QValueKindBit, qvalue.QValueKindBytes: + case qvalue.QValueKindBytes: bqField.Type = bigquery.BytesFieldType case qvalue.QValueKindArrayInt16, qvalue.QValueKindArrayInt32, qvalue.QValueKindArrayInt64: bqField.Type = bigquery.IntegerFieldType diff --git a/flow/connectors/postgres/normalize_stmt_generator.go b/flow/connectors/postgres/normalize_stmt_generator.go index 49013b924d..ff33af6bef 100644 --- a/flow/connectors/postgres/normalize_stmt_generator.go +++ b/flow/connectors/postgres/normalize_stmt_generator.go @@ -42,6 +42,23 @@ func (n *normalizeStmtGenerator) columnTypeToPg(schema *protos.TableSchema, colu } } +func (n *normalizeStmtGenerator) generateExpr( + normalizedTableSchema *protos.TableSchema, + genericColumnType string, + stringCol string, + pgType string, +) string { + if normalizedTableSchema.System == protos.TypeSystem_Q { + qkind := qvalue.QValueKind(genericColumnType) + if qkind.IsArray() { + return fmt.Sprintf("ARRAY(SELECT JSON_ARRAY_ELEMENTS_TEXT((_peerdb_data->>%s)::JSON))::%s", stringCol, pgType) + } else if qkind == qvalue.QValueKindBytes { + return fmt.Sprintf("decode(_peerdb_data->>%s, 'base64')::%s", stringCol, pgType) + } + } + return fmt.Sprintf("(_peerdb_data->>%s)::%s", stringCol, pgType) +} + func (n *normalizeStmtGenerator) generateNormalizeStatements(dstTable string) []string { normalizedTableSchema := n.tableSchemaMapping[dstTable] if n.supportsMerge { @@ -70,12 +87,7 @@ func (n *normalizeStmtGenerator) generateFallbackStatements( stringCol := QuoteLiteral(column.Name) columnNames = append(columnNames, quotedCol) pgType := n.columnTypeToPg(normalizedTableSchema, genericColumnType) - var expr string - if normalizedTableSchema.System == protos.TypeSystem_Q && qvalue.QValueKind(genericColumnType).IsArray() { - expr = fmt.Sprintf("ARRAY(SELECT JSON_ARRAY_ELEMENTS_TEXT((_peerdb_data->>%s)::JSON))::%s", stringCol, pgType) - } else { - expr = fmt.Sprintf("(_peerdb_data->>%s)::%s", stringCol, pgType) - } + expr := n.generateExpr(normalizedTableSchema, genericColumnType, stringCol, pgType) flattenedCastsSQLArray = append(flattenedCastsSQLArray, fmt.Sprintf("%s AS %s", expr, quotedCol)) if slices.Contains(normalizedTableSchema.PrimaryKeyColumns, column.Name) { @@ -138,14 +150,9 @@ func (n *normalizeStmtGenerator) generateMergeStatement( quotedCol := QuoteIdentifier(column.Name) stringCol := QuoteLiteral(column.Name) quotedColumnNames[i] = quotedCol - pgType := n.columnTypeToPg(normalizedTableSchema, genericColumnType) - var expr string - if normalizedTableSchema.System == protos.TypeSystem_Q && qvalue.QValueKind(genericColumnType).IsArray() { - expr = fmt.Sprintf("ARRAY(SELECT JSON_ARRAY_ELEMENTS_TEXT((_peerdb_data->>%s)::JSON))::%s", stringCol, pgType) - } else { - expr = fmt.Sprintf("(_peerdb_data->>%s)::%s", stringCol, pgType) - } + expr := n.generateExpr(normalizedTableSchema, genericColumnType, stringCol, pgType) + flattenedCastsSQLArray = append(flattenedCastsSQLArray, fmt.Sprintf("%s AS %s", expr, quotedCol)) if slices.Contains(normalizedTableSchema.PrimaryKeyColumns, column.Name) { primaryKeyColumnCasts[column.Name] = fmt.Sprintf("(_peerdb_data->>%s)::%s", stringCol, pgType) diff --git a/flow/connectors/postgres/qvalue_convert.go b/flow/connectors/postgres/qvalue_convert.go index 246da6b49a..d359212bdb 100644 --- a/flow/connectors/postgres/qvalue_convert.go +++ b/flow/connectors/postgres/qvalue_convert.go @@ -82,8 +82,6 @@ func (c *PostgresConnector) postgresOIDToQValueKind(recvOID uint32) qvalue.QValu return qvalue.QValueKindTimestampTZ case pgtype.NumericOID: return qvalue.QValueKindNumeric - case pgtype.BitOID, pgtype.VarbitOID: - return qvalue.QValueKindBit case pgtype.Int2ArrayOID: return qvalue.QValueKindArrayInt16 case pgtype.Int4ArrayOID: @@ -179,8 +177,6 @@ func qValueKindToPostgresType(colTypeStr string) string { return "TIMESTAMPTZ" case qvalue.QValueKindNumeric: return "NUMERIC" - case qvalue.QValueKindBit: - return "BIT" case qvalue.QValueKindINET: return "INET" case qvalue.QValueKindCIDR: @@ -379,11 +375,6 @@ func parseFieldFromQValueKind(qvalueKind qvalue.QValueKind, value interface{}) ( case qvalue.QValueKindBytes: rawBytes := value.([]byte) return qvalue.QValueBytes{Val: rawBytes}, nil - case qvalue.QValueKindBit: - bitsVal := value.(pgtype.Bits) - if bitsVal.Valid { - return qvalue.QValueBit{Val: bitsVal.Bytes}, nil - } case qvalue.QValueKindNumeric: numVal := value.(pgtype.Numeric) if numVal.Valid { @@ -449,10 +440,9 @@ func parseFieldFromQValueKind(qvalueKind qvalue.QValueKind, value interface{}) ( } return qvalue.QValueArrayString{Val: a}, nil case qvalue.QValueKindPoint: - xCoord := value.(pgtype.Point).P.X - yCoord := value.(pgtype.Point).P.Y + coord := value.(pgtype.Point).P return qvalue.QValuePoint{ - Val: fmt.Sprintf("POINT(%f %f)", xCoord, yCoord), + Val: fmt.Sprintf("POINT(%f %f)", coord.X, coord.Y), }, nil default: textVal, ok := value.(string) diff --git a/flow/connectors/postgres/schema_delta_test_constants.go b/flow/connectors/postgres/schema_delta_test_constants.go index 6ded70625a..52c50db453 100644 --- a/flow/connectors/postgres/schema_delta_test_constants.go +++ b/flow/connectors/postgres/schema_delta_test_constants.go @@ -7,7 +7,6 @@ import ( var AddAllColumnTypes = []string{ string(qvalue.QValueKindInt32), - string(qvalue.QValueKindBit), string(qvalue.QValueKindBoolean), string(qvalue.QValueKindBytes), string(qvalue.QValueKindDate), @@ -32,11 +31,6 @@ var AddAllColumnTypesFields = []*protos.FieldDescription{ Type: string(qvalue.QValueKindInt32), TypeModifier: -1, }, - { - Name: "c1", - Type: string(qvalue.QValueKindBit), - TypeModifier: 1, - }, { Name: "c2", Type: string(qvalue.QValueKindBoolean), diff --git a/flow/connectors/snowflake/avro_file_writer_test.go b/flow/connectors/snowflake/avro_file_writer_test.go index a4d89bd773..ac6f253517 100644 --- a/flow/connectors/snowflake/avro_file_writer_test.go +++ b/flow/connectors/snowflake/avro_file_writer_test.go @@ -60,8 +60,6 @@ func createQValue(t *testing.T, kind qvalue.QValueKind, placeholder int) qvalue. // value = `{"key": "value"}` // placeholder JSON, replace with actual logic case qvalue.QValueKindBytes: return qvalue.QValueBytes{Val: []byte("sample bytes")} // placeholder bytes, replace with actual logic - case qvalue.QValueKindBit: - return qvalue.QValueBit{Val: []byte("sample bits")} // placeholder bytes, replace with actual logic default: require.Failf(t, "unsupported QValueKind", "unsupported QValueKind: %s", kind) return qvalue.QValueNull(kind) @@ -97,7 +95,6 @@ func generateRecords( qvalue.QValueKindUUID, qvalue.QValueKindQChar, // qvalue.QValueKindJSON, - qvalue.QValueKindBit, } numKinds := len(allQValueKinds) diff --git a/flow/connectors/snowflake/merge_stmt_generator.go b/flow/connectors/snowflake/merge_stmt_generator.go index 41ae4c31e9..3f0cfbc63a 100644 --- a/flow/connectors/snowflake/merge_stmt_generator.go +++ b/flow/connectors/snowflake/merge_stmt_generator.go @@ -41,7 +41,7 @@ func (m *mergeStmtGenerator) generateMergeStmt(dstTable string) (string, error) targetColumnName := SnowflakeIdentifierNormalize(column.Name) switch qvKind { - case qvalue.QValueKindBytes, qvalue.QValueKindBit: + case qvalue.QValueKindBytes: flattenedCastsSQLArray = append(flattenedCastsSQLArray, fmt.Sprintf("BASE64_DECODE_BINARY(%s:\"%s\") "+ "AS %s", toVariantColumnName, column.Name, targetColumnName)) case qvalue.QValueKindGeography: diff --git a/flow/connectors/sql/query_executor.go b/flow/connectors/sql/query_executor.go index 48ab5ce454..fb94280c38 100644 --- a/flow/connectors/sql/query_executor.go +++ b/flow/connectors/sql/query_executor.go @@ -221,7 +221,7 @@ func (g *GenericSQLQueryExecutor) processRows(rows *sqlx.Rows) (*model.QRecordBa case qvalue.QValueKindString, qvalue.QValueKindHStore: var s sql.NullString values[i] = &s - case qvalue.QValueKindBytes, qvalue.QValueKindBit: + case qvalue.QValueKindBytes: values[i] = new([]byte) case qvalue.QValueKindNumeric: var s sql.Null[decimal.Decimal] @@ -435,10 +435,6 @@ func toQValue(kind qvalue.QValueKind, val interface{}) (qvalue.QValue, error) { if v, ok := val.(*[]byte); ok && v != nil { return qvalue.QValueBytes{Val: *v}, nil } - case qvalue.QValueKindBit: - if v, ok := val.(*[]byte); ok && v != nil { - return qvalue.QValueBit{Val: *v}, nil - } case qvalue.QValueKindUUID: if v, ok := val.(*[]byte); ok && v != nil { diff --git a/flow/connectors/sqlserver/qvalue_convert.go b/flow/connectors/sqlserver/qvalue_convert.go index b4f73420e1..57f2d90fb9 100644 --- a/flow/connectors/sqlserver/qvalue_convert.go +++ b/flow/connectors/sqlserver/qvalue_convert.go @@ -17,7 +17,6 @@ var qValueKindToSQLServerTypeMap = map[qvalue.QValueKind]string{ qvalue.QValueKindTimestampTZ: "DATETIMEOFFSET", qvalue.QValueKindTime: "TIME", qvalue.QValueKindDate: "DATE", - qvalue.QValueKindBit: "BINARY", qvalue.QValueKindBytes: "VARBINARY(MAX)", qvalue.QValueKindStruct: "NTEXT", // SQL Server doesn't support struct type qvalue.QValueKindUUID: "UNIQUEIDENTIFIER", @@ -47,7 +46,6 @@ var sqlServerTypeToQValueKindMap = map[string]qvalue.QValueKind{ "TIME": qvalue.QValueKindTime, "DATE": qvalue.QValueKindDate, "VARBINARY(MAX)": qvalue.QValueKindBytes, - "BINARY": qvalue.QValueKindBit, "DECIMAL": qvalue.QValueKindNumeric, "UNIQUEIDENTIFIER": qvalue.QValueKindUUID, "SMALLINT": qvalue.QValueKindInt32, diff --git a/flow/connectors/utils/cdc_store.go b/flow/connectors/utils/cdc_store.go index be42064d77..6b36f73258 100644 --- a/flow/connectors/utils/cdc_store.go +++ b/flow/connectors/utils/cdc_store.go @@ -98,7 +98,6 @@ func init() { gob.Register(qvalue.QValueBytes{}) gob.Register(qvalue.QValueUUID{}) gob.Register(qvalue.QValueJSON{}) - gob.Register(qvalue.QValueBit{}) gob.Register(qvalue.QValueHStore{}) gob.Register(qvalue.QValueGeography{}) gob.Register(qvalue.QValueGeometry{}) diff --git a/flow/e2e/bigquery/bigquery_helper.go b/flow/e2e/bigquery/bigquery_helper.go index 9fd5691114..6138c036e0 100644 --- a/flow/e2e/bigquery/bigquery_helper.go +++ b/flow/e2e/bigquery/bigquery_helper.go @@ -432,7 +432,7 @@ func qValueKindToBqColTypeString(val qvalue.QValueKind) (string, error) { return "BOOL", nil case qvalue.QValueKindTimestamp: return "TIMESTAMP", nil - case qvalue.QValueKindBytes, qvalue.QValueKindBit: + case qvalue.QValueKindBytes: return "BYTES", nil case qvalue.QValueKindNumeric: return "NUMERIC", nil diff --git a/flow/e2e/postgres/peer_flow_pg_test.go b/flow/e2e/postgres/peer_flow_pg_test.go index 93d2f4de24..ec96420480 100644 --- a/flow/e2e/postgres/peer_flow_pg_test.go +++ b/flow/e2e/postgres/peer_flow_pg_test.go @@ -99,7 +99,7 @@ func (s PeerFlowE2ETestSuitePG) Test_Types_PG() { dstTableName := s.attachSchemaSuffix("test_types_pg_dst") _, err := s.Conn().Exec(context.Background(), fmt.Sprintf(` - CREATE TABLE IF NOT EXISTS %s (id serial PRIMARY KEY,c1 BIGINT,c2 BIT,c4 BOOLEAN, + CREATE TABLE IF NOT EXISTS %s (id serial PRIMARY KEY,c1 BIGINT,c2 BYTEA,c4 BOOLEAN, c7 CHARACTER,c8 varchar,c9 CIDR,c11 DATE,c12 FLOAT,c13 DOUBLE PRECISION, c14 INET,c15 INTEGER,c21 MACADDR, c29 SMALLINT,c32 TEXT, @@ -124,7 +124,7 @@ func (s PeerFlowE2ETestSuitePG) Test_Types_PG() { env := e2e.ExecutePeerflow(tc, peerflow.CDCFlowWorkflow, flowConnConfig, nil) e2e.SetupCDCFlowStatusQuery(s.t, env, flowConnConfig) _, err = s.Conn().Exec(context.Background(), fmt.Sprintf(` - INSERT INTO %s SELECT 2,2,b'1', + INSERT INTO %s SELECT 2,2,'\xdeadbeef', true,'s','test','1.1.10.2'::cidr, CURRENT_DATE,1.23,1.234,'192.168.1.5'::inet,1, '08:00:2b:01:02:03'::macaddr, diff --git a/flow/model/qrecord_copy_from_source.go b/flow/model/qrecord_copy_from_source.go index 745827309c..308676c5f5 100644 --- a/flow/model/qrecord_copy_from_source.go +++ b/flow/model/qrecord_copy_from_source.go @@ -90,8 +90,6 @@ func (src *QRecordCopyFromSource) Values() ([]interface{}, error) { values[i] = uuid.UUID(v.Val) case qvalue.QValueNumeric: values[i] = v.Val - case qvalue.QValueBit: - values[i] = v.Val case qvalue.QValueBytes: values[i] = v.Val case qvalue.QValueDate: diff --git a/flow/model/qvalue/avro_converter.go b/flow/model/qvalue/avro_converter.go index 764fd86759..648a6aa7ac 100644 --- a/flow/model/qvalue/avro_converter.go +++ b/flow/model/qvalue/avro_converter.go @@ -95,7 +95,7 @@ func GetAvroSchemaFromQValueKind(kind QValueKind, targetDWH protos.DBType, preci return "double", nil case QValueKindBoolean: return "boolean", nil - case QValueKindBytes, QValueKindBit: + case QValueKindBytes: return "bytes", nil case QValueKindNumeric: avroNumericPrecision, avroNumericScale := DetermineNumericSettingForDWH(precision, scale, targetDWH) @@ -328,8 +328,6 @@ func QValueToAvro(value QValue, field *QField, targetDWH protos.DBType, logger l return c.processNumeric(v.Val), nil case QValueBytes: return c.processBytes(v.Val), nil - case QValueBit: - return c.processBytes(v.Val), nil case QValueJSON: return c.processJSON(v.Val), nil case QValueHStore: diff --git a/flow/model/qvalue/equals.go b/flow/model/qvalue/equals.go index 97ba7ca73f..a609c6df6a 100644 --- a/flow/model/qvalue/equals.go +++ b/flow/model/qvalue/equals.go @@ -76,8 +76,6 @@ func Equals(qv QValue, other QValue) bool { case QValueJSON: // TODO (kaushik): fix for tests return true - case QValueBit: - return compareBytes(qvValue, otherValue) case QValueGeometry: return compareGeometry(q.Val, otherValue) case QValueGeography: diff --git a/flow/model/qvalue/kind.go b/flow/model/qvalue/kind.go index fa0a3c2235..43e2495429 100644 --- a/flow/model/qvalue/kind.go +++ b/flow/model/qvalue/kind.go @@ -30,7 +30,6 @@ const ( QValueKindBytes QValueKind = "bytes" QValueKindUUID QValueKind = "uuid" QValueKindJSON QValueKind = "json" - QValueKindBit QValueKind = "bit" QValueKindHStore QValueKind = "hstore" QValueKindGeography QValueKind = "geography" QValueKindGeometry QValueKind = "geometry" @@ -75,7 +74,6 @@ var QValueKindToSnowflakeTypeMap = map[QValueKind]string{ QValueKindTime: "TIME", QValueKindTimeTZ: "TIME", QValueKindDate: "DATE", - QValueKindBit: "BINARY", QValueKindBytes: "BINARY", QValueKindStruct: "STRING", QValueKindUUID: "STRING", @@ -113,7 +111,6 @@ var QValueKindToClickhouseTypeMap = map[QValueKind]string{ QValueKindTimestampTZ: "DateTime64(6)", QValueKindTime: "String", QValueKindDate: "Date", - QValueKindBit: "Boolean", QValueKindBytes: "String", QValueKindStruct: "String", QValueKindUUID: "UUID", diff --git a/flow/model/qvalue/qvalue.go b/flow/model/qvalue/qvalue.go index 91b9e3fe31..9b1c13f755 100644 --- a/flow/model/qvalue/qvalue.go +++ b/flow/model/qvalue/qvalue.go @@ -358,22 +358,6 @@ func (v QValueJSON) LValue(ls *lua.LState) lua.LValue { return lua.LString(v.Val) } -type QValueBit struct { - Val []byte -} - -func (QValueBit) Kind() QValueKind { - return QValueKindBit -} - -func (v QValueBit) Value() any { - return v.Val -} - -func (v QValueBit) LValue(ls *lua.LState) lua.LValue { - return lua.LString(shared.UnsafeFastReadOnlyBytesToString(v.Val)) -} - type QValueHStore struct { Val string } diff --git a/flow/model/record_items.go b/flow/model/record_items.go index 55614183ab..daa8ab1aea 100644 --- a/flow/model/record_items.go +++ b/flow/model/record_items.go @@ -4,7 +4,6 @@ import ( "encoding/json" "fmt" "math" - "strings" "github.com/google/uuid" @@ -88,22 +87,6 @@ func (r RecordItems) toMap(opts ToJSONOptions) (map[string]interface{}, error) { } switch v := qv.(type) { - case qvalue.QValueBit: - // convert to binary string since json.Marshal stores byte arrays as base64 - var binStr strings.Builder - binStr.Grow(len(v.Val) * 8) - for _, b := range v.Val { - binStr.WriteString(fmt.Sprintf("%08b", b)) - } - jsonStruct[col] = binStr.String() - case qvalue.QValueBytes: - // convert to binary string since json.Marshal stores byte arrays as base64 - var binStr strings.Builder - binStr.Grow(len(v.Val) * 8) - for _, b := range v.Val { - binStr.WriteString(fmt.Sprintf("%08b", b)) - } - jsonStruct[col] = binStr.String() case qvalue.QValueUUID: jsonStruct[col] = uuid.UUID(v.Val) case qvalue.QValueQChar: diff --git a/flow/pua/peerdb.go b/flow/pua/peerdb.go index 9a96e3576b..341d878186 100644 --- a/flow/pua/peerdb.go +++ b/flow/pua/peerdb.go @@ -246,8 +246,6 @@ func LuaRowNewIndex(ls *lua.LState) int { } case qvalue.QValueKindJSON: newqv = qvalue.QValueJSON{Val: lua.LVAsString(val)} - case qvalue.QValueKindBit: - newqv = qvalue.QValueBit{Val: []byte(lua.LVAsString(val))} case qvalue.QValueKindArrayFloat32: if tbl, ok := val.(*lua.LTable); ok { newqv = qvalue.QValueArrayFloat32{ From 3e1ddefa4e9d2067792c12a7e8d6fa7ada6466d2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Philip=20Dub=C3=A9?= Date: Mon, 24 Jun 2024 16:22:04 +0000 Subject: [PATCH 2/2] no peer config stored in temporal state (#1844) Also api uses peer names more too instead of passing configs around This will need two version bumps to sanitize: - on 0.13 user pauses their mirrors. This is important, failure to do so will break mirror when upgrading - after upgrading to 0.14 unpause mirror. Temporal data will be sanitized at this point - in 0.15 sanitization logic will be removed & protobufs can remove fields. Mirror should be paused in 0.14 to prevent replay using outdated protobufs --- flow/.golangci.yml | 2 +- flow/activities/flowable.go | 223 ++++++------------ flow/activities/flowable_core.go | 14 +- flow/activities/snapshot_activity.go | 25 +- flow/cmd/handler.go | 30 +-- flow/cmd/mirror_status.go | 43 +++- flow/cmd/snapshot_worker.go | 1 + flow/cmd/validate_mirror.go | 14 +- flow/cmd/validate_peer.go | 10 +- flow/connectors/core.go | 139 +++++++++-- flow/connectors/postgres/qrep.go | 15 +- flow/connectors/snowflake/qrep.go | 5 +- flow/connectors/snowflake/snowflake.go | 2 + flow/connectors/utils/peers.go | 38 ++- flow/e2e/bigquery/bigquery.go | 13 +- flow/e2e/bigquery/bigquery_helper.go | 24 +- flow/e2e/bigquery/peer_flow_bq_test.go | 159 ++++++------- flow/e2e/bigquery/qrep_flow_bq_test.go | 18 +- flow/e2e/congen.go | 66 ++---- flow/e2e/elasticsearch/elasticsearch.go | 46 ++-- flow/e2e/elasticsearch/peer_flow_es_test.go | 8 +- flow/e2e/elasticsearch/qrep_flow_es_test.go | 10 +- flow/e2e/eventhub/peer_flow_eh_test.go | 11 +- flow/e2e/generic/generic_test.go | 8 +- flow/e2e/kafka/kafka_test.go | 20 +- flow/e2e/postgres/peer_flow_pg_test.go | 97 ++++---- flow/e2e/postgres/postgres.go | 4 +- flow/e2e/postgres/qrep_flow_pg_test.go | 61 ++--- flow/e2e/pubsub/pubsub_test.go | 16 +- flow/e2e/s3/cdc_s3_test.go | 4 +- flow/e2e/s3/qrep_flow_s3_test.go | 26 +- flow/e2e/s3/s3_helper.go | 14 -- flow/e2e/snowflake/peer_flow_sf_test.go | 88 +++---- flow/e2e/snowflake/qrep_flow_sf_test.go | 54 ++--- flow/e2e/snowflake/snowflake.go | 13 +- flow/e2e/snowflake/snowflake_helper.go | 51 +--- .../snowflake/snowflake_schema_delta_test.go | 2 +- .../e2e/sqlserver/qrep_flow_sqlserver_test.go | 21 +- flow/e2e/sqlserver/sqlserver_helper.go | 19 +- flow/e2e/test_utils.go | 32 +-- flow/workflows/cdc_flow.go | 73 +++--- flow/workflows/local_activities.go | 22 ++ flow/workflows/qrep_flow.go | 67 ++++-- flow/workflows/setup_flow.go | 30 +-- flow/workflows/snapshot_flow.go | 86 ++++--- flow/workflows/sync_flow.go | 8 +- nexus/analyzer/src/lib.rs | 1 - nexus/catalog/src/lib.rs | 88 ++++--- nexus/flow-rs/src/grpc.rs | 23 +- nexus/server/src/main.rs | 33 +-- protos/flow.proto | 42 ++-- protos/peers.proto | 2 - protos/route.proto | 10 +- ui/app/api/alert-config/route.ts | 19 +- ui/app/api/mirrors/route.ts | 24 +- ui/app/api/peers/getTruePeer.ts | 47 +--- ui/app/api/peers/info/[peerName]/route.ts | 3 +- ui/app/api/peers/route.ts | 11 +- ui/app/dto/MirrorsDTO.ts | 12 + ui/app/dto/PeersDTO.ts | 2 + ui/app/mirrors/[mirrorId]/cdc.tsx | 2 +- ui/app/mirrors/[mirrorId]/cdcDetails.tsx | 31 ++- ui/app/mirrors/[mirrorId]/edit/page.tsx | 8 +- ui/app/mirrors/[mirrorId]/handlers.ts | 4 +- ui/app/mirrors/[mirrorId]/page.tsx | 2 +- ui/app/mirrors/create/cdc/cdc.tsx | 64 ++--- ui/app/mirrors/create/handlers.ts | 71 ++---- ui/app/mirrors/create/helpers/common.ts | 2 + ui/app/mirrors/create/page.tsx | 134 ++++++----- ui/app/mirrors/create/qrep/qrep.tsx | 55 +++-- ui/app/mirrors/create/schema.ts | 56 ++--- ui/app/mirrors/page.tsx | 31 +-- ui/app/mirrors/tables.tsx | 50 ++-- ui/app/peers/create/[peerType]/helpers/pg.ts | 1 - ui/components/DropDialog.tsx | 22 +- ui/components/ResyncDialog.tsx | 4 +- 76 files changed, 1241 insertions(+), 1345 deletions(-) diff --git a/flow/.golangci.yml b/flow/.golangci.yml index 4403f2bfda..684f3a93a9 100644 --- a/flow/.golangci.yml +++ b/flow/.golangci.yml @@ -21,7 +21,7 @@ linters: - misspell - musttag - nakedret - - nolintlint + # TODO bring back in 0.15 - nolintlint - nonamedreturns - perfsprint - prealloc diff --git a/flow/activities/flowable.go b/flow/activities/flowable.go index 9bfdba2e23..acab05d2d4 100644 --- a/flow/activities/flowable.go +++ b/flow/activities/flowable.go @@ -20,9 +20,7 @@ import ( "github.com/PeerDB-io/peer-flow/alerting" "github.com/PeerDB-io/peer-flow/connectors" - connbigquery "github.com/PeerDB-io/peer-flow/connectors/bigquery" connpostgres "github.com/PeerDB-io/peer-flow/connectors/postgres" - connsnowflake "github.com/PeerDB-io/peer-flow/connectors/snowflake" "github.com/PeerDB-io/peer-flow/connectors/utils" "github.com/PeerDB-io/peer-flow/connectors/utils/monitoring" "github.com/PeerDB-io/peer-flow/generated/protos" @@ -57,7 +55,7 @@ func (a *FlowableActivity) CheckConnection( config *protos.SetupInput, ) (*CheckConnectionResult, error) { ctx = context.WithValue(ctx, shared.FlowNameKey, config.FlowName) - dstConn, err := connectors.GetCDCSyncConnector(ctx, config.Peer) + dstConn, err := connectors.GetByNameAs[connectors.CDCSyncConnector](ctx, a.CatalogPool, config.PeerName) if err != nil { a.Alerter.LogFlowError(ctx, config.FlowName, err) return nil, fmt.Errorf("failed to get connector: %w", err) @@ -73,7 +71,7 @@ func (a *FlowableActivity) CheckConnection( func (a *FlowableActivity) SetupMetadataTables(ctx context.Context, config *protos.SetupInput) error { ctx = context.WithValue(ctx, shared.FlowNameKey, config.FlowName) - dstConn, err := connectors.GetCDCSyncConnector(ctx, config.Peer) + dstConn, err := connectors.GetByNameAs[connectors.CDCSyncConnector](ctx, a.CatalogPool, config.PeerName) if err != nil { return fmt.Errorf("failed to get connector: %w", err) } @@ -92,7 +90,7 @@ func (a *FlowableActivity) EnsurePullability( config *protos.EnsurePullabilityBatchInput, ) (*protos.EnsurePullabilityBatchOutput, error) { ctx = context.WithValue(ctx, shared.FlowNameKey, config.FlowJobName) - srcConn, err := connectors.GetCDCPullConnector(ctx, config.PeerConnectionConfig) + srcConn, err := connectors.GetByNameAs[connectors.CDCPullConnector](ctx, a.CatalogPool, config.PeerName) if err != nil { return nil, fmt.Errorf("failed to get connector: %w", err) } @@ -113,7 +111,7 @@ func (a *FlowableActivity) CreateRawTable( config *protos.CreateRawTableInput, ) (*protos.CreateRawTableOutput, error) { ctx = context.WithValue(ctx, shared.FlowNameKey, config.FlowJobName) - dstConn, err := connectors.GetCDCSyncConnector(ctx, config.PeerConnectionConfig) + dstConn, err := connectors.GetByNameAs[connectors.CDCSyncConnector](ctx, a.CatalogPool, config.PeerName) if err != nil { return nil, fmt.Errorf("failed to get connector: %w", err) } @@ -124,8 +122,7 @@ func (a *FlowableActivity) CreateRawTable( a.Alerter.LogFlowError(ctx, config.FlowJobName, err) return nil, err } - err = monitoring.InitializeCDCFlow(ctx, a.CatalogPool, config.FlowJobName) - if err != nil { + if err := monitoring.InitializeCDCFlow(ctx, a.CatalogPool, config.FlowJobName); err != nil { return nil, err } @@ -138,7 +135,7 @@ func (a *FlowableActivity) GetTableSchema( config *protos.GetTableSchemaBatchInput, ) (*protos.GetTableSchemaBatchOutput, error) { ctx = context.WithValue(ctx, shared.FlowNameKey, config.FlowName) - srcConn, err := connectors.GetAs[connectors.GetTableSchemaConnector](ctx, config.PeerConnectionConfig) + srcConn, err := connectors.GetByNameAs[connectors.GetTableSchemaConnector](ctx, a.CatalogPool, config.PeerName) if err != nil { return nil, fmt.Errorf("failed to get GetTableSchemaConnector: %w", err) } @@ -158,7 +155,7 @@ func (a *FlowableActivity) CreateNormalizedTable( ) (*protos.SetupNormalizedTableBatchOutput, error) { logger := activity.GetLogger(ctx) ctx = context.WithValue(ctx, shared.FlowNameKey, config.FlowName) - conn, err := connectors.GetAs[connectors.NormalizedTablesConnector](ctx, config.PeerConnectionConfig) + conn, err := connectors.GetByNameAs[connectors.NormalizedTablesConnector](ctx, a.CatalogPool, config.PeerName) if err != nil { if errors.Is(err, errors.ErrUnsupported) { logger.Info("Connector does not implement normalized tables") @@ -221,7 +218,7 @@ func (a *FlowableActivity) MaintainPull( config *protos.FlowConnectionConfigs, sessionID string, ) error { - srcConn, err := connectors.GetCDCPullConnector(ctx, config.Source) + srcConn, err := connectors.GetByNameAs[connectors.CDCPullConnector](ctx, a.CatalogPool, config.SourceName) if err != nil { return err } @@ -333,7 +330,7 @@ func (a *FlowableActivity) StartNormalize( ctx = context.WithValue(ctx, shared.FlowNameKey, conn.FlowJobName) logger := activity.GetLogger(ctx) - dstConn, err := connectors.GetCDCNormalizeConnector(ctx, conn.Destination) + dstConn, err := connectors.GetByNameAs[connectors.CDCNormalizeConnector](ctx, a.CatalogPool, conn.DestinationName) if errors.Is(err, errors.ErrUnsupported) { err = monitoring.UpdateEndTimeForCDCBatch(ctx, a.CatalogPool, input.FlowConnectionConfigs.FlowJobName, input.SyncBatchID) @@ -383,7 +380,7 @@ func (a *FlowableActivity) StartNormalize( // SetupQRepMetadataTables sets up the metadata tables for QReplication. func (a *FlowableActivity) SetupQRepMetadataTables(ctx context.Context, config *protos.QRepConfig) error { - conn, err := connectors.GetQRepSyncConnector(ctx, config.DestinationPeer) + conn, err := connectors.GetByNameAs[connectors.QRepSyncConnector](ctx, a.CatalogPool, config.DestinationName) if err != nil { return fmt.Errorf("failed to get connector: %w", err) } @@ -409,7 +406,7 @@ func (a *FlowableActivity) GetQRepPartitions(ctx context.Context, if err != nil { return nil, err } - srcConn, err := connectors.GetQRepPullConnector(ctx, config.SourcePeer) + srcConn, err := connectors.GetByNameAs[connectors.QRepPullConnector](ctx, a.CatalogPool, config.SourceName) if err != nil { return nil, fmt.Errorf("failed to get qrep pull connector: %w", err) } @@ -511,7 +508,7 @@ func (a *FlowableActivity) ConsolidateQRepPartitions(ctx context.Context, config runUUID string, ) error { ctx = context.WithValue(ctx, shared.FlowNameKey, config.FlowJobName) - dstConn, err := connectors.GetQRepConsolidateConnector(ctx, config.DestinationPeer) + dstConn, err := connectors.GetByNameAs[connectors.QRepConsolidateConnector](ctx, a.CatalogPool, config.DestinationName) if errors.Is(err, errors.ErrUnsupported) { return monitoring.UpdateEndTimeForQRepRun(ctx, a.CatalogPool, runUUID) } else if err != nil { @@ -535,7 +532,7 @@ func (a *FlowableActivity) ConsolidateQRepPartitions(ctx context.Context, config func (a *FlowableActivity) CleanupQRepFlow(ctx context.Context, config *protos.QRepConfig) error { ctx = context.WithValue(ctx, shared.FlowNameKey, config.FlowJobName) - dst, err := connectors.GetQRepConsolidateConnector(ctx, config.DestinationPeer) + dst, err := connectors.GetByNameAs[connectors.QRepConsolidateConnector](ctx, a.CatalogPool, config.DestinationName) if errors.Is(err, errors.ErrUnsupported) { return nil } else if err != nil { @@ -548,7 +545,7 @@ func (a *FlowableActivity) CleanupQRepFlow(ctx context.Context, config *protos.Q } func (a *FlowableActivity) DropFlowSource(ctx context.Context, config *protos.ShutdownRequest) error { - srcConn, err := connectors.GetCDCPullConnector(ctx, config.SourcePeer) + srcConn, err := connectors.GetByNameAs[connectors.CDCPullConnector](ctx, a.CatalogPool, config.SourcePeer) if err != nil { return fmt.Errorf("failed to get source connector: %w", err) } @@ -559,7 +556,7 @@ func (a *FlowableActivity) DropFlowSource(ctx context.Context, config *protos.Sh func (a *FlowableActivity) DropFlowDestination(ctx context.Context, config *protos.ShutdownRequest) error { ctx = context.WithValue(ctx, shared.FlowNameKey, config.FlowJobName) - dstConn, err := connectors.GetCDCSyncConnector(ctx, config.DestinationPeer) + dstConn, err := connectors.GetByNameAs[connectors.CDCSyncConnector](ctx, a.CatalogPool, config.DestinationPeer) if err != nil { return fmt.Errorf("failed to get destination connector: %w", err) } @@ -587,31 +584,30 @@ func (a *FlowableActivity) SendWALHeartbeat(ctx context.Context) error { pgPeers, err := a.getPostgresPeerConfigs(ctx) if err != nil { - logger.Warn("[sendwalheartbeat] unable to fetch peers. Skipping walheartbeat send.", slog.Any("error", err)) + logger.Warn("unable to fetch peers. Skipping walheartbeat send.", slog.Any("error", err)) return err } // run above command for each Postgres peer for _, pgPeer := range pgPeers { activity.RecordHeartbeat(ctx, pgPeer.Name) - if ctx.Err() != nil { - return nil + if err := ctx.Err(); err != nil { + return err } func() { pgConfig := pgPeer.GetPostgresConfig() pgConn, peerErr := connpostgres.NewPostgresConnector(ctx, pgConfig) if peerErr != nil { - logger.Error(fmt.Sprintf("error creating connector for postgres peer %v with host %v: %v", + logger.Error(fmt.Sprintf("error creating connector for postgres peer %s with host %s: %v", pgPeer.Name, pgConfig.Host, peerErr)) return } defer pgConn.Close() - cmdErr := pgConn.ExecuteCommand(ctx, walHeartbeatStatement) - if cmdErr != nil { - logger.Warn(fmt.Sprintf("could not send walheartbeat to peer %v: %v", pgPeer.Name, cmdErr)) + if cmdErr := pgConn.ExecuteCommand(ctx, walHeartbeatStatement); cmdErr != nil { + logger.Warn(fmt.Sprintf("could not send walheartbeat to peer %s: %v", pgPeer.Name, cmdErr)) } - logger.Info(fmt.Sprintf("sent walheartbeat to peer %v", pgPeer.Name)) + logger.Info("sent walheartbeat", slog.String("peer", pgPeer.Name)) }() } @@ -647,7 +643,7 @@ func (a *FlowableActivity) RecordSlotSizes(ctx context.Context) error { logger := activity.GetLogger(ctx) for _, config := range configs { func() { - srcConn, err := connectors.GetCDCPullConnector(ctx, config.Source) + srcConn, err := connectors.GetByNameAs[connectors.CDCPullConnector](ctx, a.CatalogPool, config.SourceName) if err != nil { if !errors.Is(err, errors.ErrUnsupported) { logger.Error("Failed to create connector to handle slot info", slog.Any("error", err)) @@ -660,7 +656,7 @@ func (a *FlowableActivity) RecordSlotSizes(ctx context.Context) error { if config.ReplicationSlotName != "" { slotName = config.ReplicationSlotName } - peerName := config.Source.Name + peerName := config.SourceName activity.RecordHeartbeat(ctx, fmt.Sprintf("checking %s on %s", slotName, peerName)) if ctx.Err() != nil { @@ -701,39 +697,29 @@ func (a *FlowableActivity) RecordSlotSizes(ctx context.Context) error { slotMetricGuages.OpenReplicationConnectionsGuage = openReplicationConnectionsGauge } - err = srcConn.HandleSlotInfo(ctx, a.Alerter, a.CatalogPool, slotName, peerName, slotMetricGuages) - if err != nil { + if err := srcConn.HandleSlotInfo(ctx, a.Alerter, a.CatalogPool, slotName, peerName, slotMetricGuages); err != nil { logger.Error("Failed to handle slot info", slog.Any("error", err)) } }() - if ctx.Err() != nil { - return nil - } } return nil } -type QRepWaitUntilNewRowsResult struct { - Found bool -} - func (a *FlowableActivity) QRepHasNewRows(ctx context.Context, config *protos.QRepConfig, last *protos.QRepPartition, -) (QRepWaitUntilNewRowsResult, error) { +) (bool, error) { ctx = context.WithValue(ctx, shared.FlowNameKey, config.FlowJobName) logger := log.With(activity.GetLogger(ctx), slog.String(string(shared.FlowNameKey), config.FlowJobName)) - if config.SourcePeer.Type != protos.DBType_POSTGRES { - return QRepWaitUntilNewRowsResult{Found: true}, nil - } - - logger.Info(fmt.Sprintf("current last partition value is %v", last)) - - srcConn, err := connectors.GetQRepPullConnector(ctx, config.SourcePeer) + // TODO implement for other QRepPullConnector sources + srcConn, err := connectors.GetByNameAs[*connpostgres.PostgresConnector](ctx, a.CatalogPool, config.SourceName) if err != nil { + if errors.Is(err, errors.ErrUnsupported) { + return true, nil + } a.Alerter.LogFlowError(ctx, config.FlowJobName, err) - return QRepWaitUntilNewRowsResult{Found: false}, fmt.Errorf("failed to get qrep source connector: %w", err) + return false, fmt.Errorf("failed to get qrep source connector: %w", err) } defer connectors.CloseConnector(ctx, srcConn) @@ -742,21 +728,21 @@ func (a *FlowableActivity) QRepHasNewRows(ctx context.Context, }) defer shutdown() - pgSrcConn := srcConn.(*connpostgres.PostgresConnector) - result, err := pgSrcConn.CheckForUpdatedMaxValue(ctx, config, last) + logger.Info(fmt.Sprintf("current last partition value is %v", last)) + + result, err := srcConn.CheckForUpdatedMaxValue(ctx, config, last) if err != nil { a.Alerter.LogFlowError(ctx, config.FlowJobName, err) - return QRepWaitUntilNewRowsResult{Found: false}, fmt.Errorf("failed to check for new rows: %w", err) + return false, fmt.Errorf("failed to check for new rows: %w", err) } - - return QRepWaitUntilNewRowsResult{Found: result}, nil + return result, nil } func (a *FlowableActivity) RenameTables(ctx context.Context, config *protos.RenameTablesInput) ( *protos.RenameTablesOutput, error, ) { ctx = context.WithValue(ctx, shared.FlowNameKey, config.FlowJobName) - conn, err := connectors.GetAs[connectors.RenameTablesConnector](ctx, config.Peer) + conn, err := connectors.GetByNameAs[connectors.RenameTablesConnector](ctx, a.CatalogPool, config.PeerName) if err != nil { a.Alerter.LogFlowError(ctx, config.FlowJobName, err) return nil, fmt.Errorf("failed to get connector: %w", err) @@ -775,26 +761,13 @@ func (a *FlowableActivity) CreateTablesFromExisting(ctx context.Context, req *pr *protos.CreateTablesFromExistingOutput, error, ) { ctx = context.WithValue(ctx, shared.FlowNameKey, req.FlowJobName) - dstConn, err := connectors.GetCDCSyncConnector(ctx, req.Peer) + dstConn, err := connectors.GetByNameAs[connectors.CreateTablesFromExistingConnector](ctx, a.CatalogPool, req.PeerName) if err != nil { return nil, fmt.Errorf("failed to get connector: %w", err) } defer connectors.CloseConnector(ctx, dstConn) - if req.Peer.Type == protos.DBType_SNOWFLAKE { - sfConn, ok := dstConn.(*connsnowflake.SnowflakeConnector) - if !ok { - return nil, errors.New("failed to cast connector to snowflake connector") - } - return sfConn.CreateTablesFromExisting(ctx, req) - } else if req.Peer.Type == protos.DBType_BIGQUERY { - bqConn, ok := dstConn.(*connbigquery.BigQueryConnector) - if !ok { - return nil, errors.New("failed to cast connector to bigquery connector") - } - return bqConn.CreateTablesFromExisting(ctx, req) - } - return nil, errors.New("create tables from existing is only supported on snowflake and bigquery") + return dstConn.CreateTablesFromExisting(ctx, req) } func (a *FlowableActivity) ReplicateXminPartition(ctx context.Context, @@ -824,7 +797,7 @@ func (a *FlowableActivity) AddTablesToPublication(ctx context.Context, cfg *prot additionalTableMappings []*protos.TableMapping, ) error { ctx = context.WithValue(ctx, shared.FlowNameKey, cfg.FlowJobName) - srcConn, err := connectors.GetCDCPullConnector(ctx, cfg.Source) + srcConn, err := connectors.GetByNameAs[connectors.CDCPullConnector](ctx, a.CatalogPool, cfg.SourceName) if err != nil { return fmt.Errorf("failed to get source connector: %w", err) } @@ -841,94 +814,38 @@ func (a *FlowableActivity) AddTablesToPublication(ctx context.Context, cfg *prot return err } -func (a *FlowableActivity) LoadPeer(ctx context.Context, peerName string) (*protos.Peer, error) { - row := a.CatalogPool.QueryRow(ctx, ` - SELECT name, type, options - FROM peers - WHERE name = $1`, peerName) +// TODO remove in 0.15 +func (a *FlowableActivity) UpdateCdcFlowConfigInCatalog( + ctx context.Context, + cfg *protos.FlowConnectionConfigs, +) error { + cfgBytes, err := proto.Marshal(cfg) + if err != nil { + return fmt.Errorf("unable to marshal flow config: %w", err) + } + + _, err = a.CatalogPool.Exec(ctx, "UPDATE flows SET config_proto = $1 WHERE name = $2", cfgBytes, cfg.FlowJobName) + if err != nil { + return fmt.Errorf("unable to update flow config in catalog: %w", err) + } + + return nil +} - var peer protos.Peer - var peerOptions []byte - if err := row.Scan(&peer.Name, &peer.Type, &peerOptions); err != nil { - return nil, fmt.Errorf("failed to load peer: %w", err) +// TODO remove in 0.15 +func (a *FlowableActivity) UpdateQRepFlowConfigInCatalog( + ctx context.Context, + cfg *protos.FlowConnectionConfigs, +) error { + cfgBytes, err := proto.Marshal(cfg) + if err != nil { + return fmt.Errorf("unable to marshal flow config: %w", err) } - switch peer.Type { - case protos.DBType_BIGQUERY: - var config protos.BigqueryConfig - if err := proto.Unmarshal(peerOptions, &config); err != nil { - return nil, fmt.Errorf("failed to unmarshal BigQuery config: %w", err) - } - peer.Config = &protos.Peer_BigqueryConfig{BigqueryConfig: &config} - case protos.DBType_SNOWFLAKE: - var config protos.SnowflakeConfig - if err := proto.Unmarshal(peerOptions, &config); err != nil { - return nil, fmt.Errorf("failed to unmarshal Snowflake config: %w", err) - } - peer.Config = &protos.Peer_SnowflakeConfig{SnowflakeConfig: &config} - case protos.DBType_MONGO: - var config protos.MongoConfig - if err := proto.Unmarshal(peerOptions, &config); err != nil { - return nil, fmt.Errorf("failed to unmarshal MongoDB config: %w", err) - } - peer.Config = &protos.Peer_MongoConfig{MongoConfig: &config} - case protos.DBType_POSTGRES: - var config protos.PostgresConfig - if err := proto.Unmarshal(peerOptions, &config); err != nil { - return nil, fmt.Errorf("failed to unmarshal Postgres config: %w", err) - } - peer.Config = &protos.Peer_PostgresConfig{PostgresConfig: &config} - case protos.DBType_S3: - var config protos.S3Config - if err := proto.Unmarshal(peerOptions, &config); err != nil { - return nil, fmt.Errorf("failed to unmarshal S3 config: %w", err) - } - peer.Config = &protos.Peer_S3Config{S3Config: &config} - case protos.DBType_SQLSERVER: - var config protos.SqlServerConfig - if err := proto.Unmarshal(peerOptions, &config); err != nil { - return nil, fmt.Errorf("failed to unmarshal SQL Server config: %w", err) - } - peer.Config = &protos.Peer_SqlserverConfig{SqlserverConfig: &config} - case protos.DBType_MYSQL: - var config protos.MySqlConfig - if err := proto.Unmarshal(peerOptions, &config); err != nil { - return nil, fmt.Errorf("failed to unmarshal MySQL config: %w", err) - } - peer.Config = &protos.Peer_MysqlConfig{MysqlConfig: &config} - case protos.DBType_CLICKHOUSE: - var config protos.ClickhouseConfig - if err := proto.Unmarshal(peerOptions, &config); err != nil { - return nil, fmt.Errorf("failed to unmarshal ClickHouse config: %w", err) - } - peer.Config = &protos.Peer_ClickhouseConfig{ClickhouseConfig: &config} - case protos.DBType_KAFKA: - var config protos.KafkaConfig - if err := proto.Unmarshal(peerOptions, &config); err != nil { - return nil, fmt.Errorf("failed to unmarshal Kafka config: %w", err) - } - peer.Config = &protos.Peer_KafkaConfig{KafkaConfig: &config} - case protos.DBType_PUBSUB: - var config protos.PubSubConfig - if err := proto.Unmarshal(peerOptions, &config); err != nil { - return nil, fmt.Errorf("failed to unmarshal Pub/Sub config: %w", err) - } - peer.Config = &protos.Peer_PubsubConfig{PubsubConfig: &config} - case protos.DBType_EVENTHUBS: - var config protos.EventHubGroupConfig - if err := proto.Unmarshal(peerOptions, &config); err != nil { - return nil, fmt.Errorf("failed to unmarshal Event Hubs config: %w", err) - } - peer.Config = &protos.Peer_EventhubGroupConfig{EventhubGroupConfig: &config} - case protos.DBType_ELASTICSEARCH: - var config protos.ElasticsearchConfig - if err := proto.Unmarshal(peerOptions, &config); err != nil { - return nil, fmt.Errorf("failed to unmarshal Elasticsearch config: %w", err) - } - peer.Config = &protos.Peer_ElasticsearchConfig{ElasticsearchConfig: &config} - default: - return nil, fmt.Errorf("unsupported peer type: %s", peer.Type) + _, err = a.CatalogPool.Exec(ctx, "UPDATE flows SET config_proto = $1 WHERE name = $2", cfgBytes, cfg.FlowJobName) + if err != nil { + return fmt.Errorf("unable to update flow config in catalog: %w", err) } - return &peer, nil + return nil } diff --git a/flow/activities/flowable_core.go b/flow/activities/flowable_core.go index 4d9d0c9611..7ca752b8a2 100644 --- a/flow/activities/flowable_core.go +++ b/flow/activities/flowable_core.go @@ -85,7 +85,7 @@ func syncCore[TPull connectors.CDCPullConnectorCore, TSync connectors.CDCSyncCon }) defer shutdown() - dstConn, err := connectors.GetAs[TSync](ctx, config.Destination) + dstConn, err := connectors.GetByNameAs[TSync](ctx, a.CatalogPool, config.DestinationName) if err != nil { return nil, fmt.Errorf("failed to get destination connector: %w", err) } @@ -154,7 +154,7 @@ func syncCore[TPull connectors.CDCPullConnectorCore, TSync connectors.CDCSyncCon hasRecords := !recordBatchSync.WaitAndCheckEmpty() logger.Info("current sync flow has records?", slog.Bool("hasRecords", hasRecords)) - dstConn, err = connectors.GetAs[TSync](ctx, config.Destination) + dstConn, err = connectors.GetByNameAs[TSync](ctx, a.CatalogPool, config.DestinationName) if err != nil { return nil, fmt.Errorf("failed to recreate destination connector: %w", err) } @@ -189,7 +189,7 @@ func syncCore[TPull connectors.CDCPullConnectorCore, TSync connectors.CDCSyncCon var res *model.SyncResponse errGroup.Go(func() error { syncBatchID, err := dstConn.GetLastSyncBatchID(errCtx, flowName) - if err != nil && config.Destination.Type != protos.DBType_EVENTHUBS { + if err != nil { return err } syncBatchID += 1 @@ -332,14 +332,14 @@ func replicateQRepPartition[TRead any, TWrite any, TSync connectors.QRepSyncConn ctx = context.WithValue(ctx, shared.FlowNameKey, config.FlowJobName) logger := log.With(activity.GetLogger(ctx), slog.String(string(shared.FlowNameKey), config.FlowJobName)) - srcConn, err := connectors.GetAs[TPull](ctx, config.SourcePeer) + srcConn, err := connectors.GetByNameAs[TPull](ctx, a.CatalogPool, config.SourceName) if err != nil { a.Alerter.LogFlowError(ctx, config.FlowJobName, err) return fmt.Errorf("failed to get qrep source connector: %w", err) } defer connectors.CloseConnector(ctx, srcConn) - dstConn, err := connectors.GetAs[TSync](ctx, config.DestinationPeer) + dstConn, err := connectors.GetByNameAs[TSync](ctx, a.CatalogPool, config.DestinationName) if err != nil { a.Alerter.LogFlowError(ctx, config.FlowJobName, err) return fmt.Errorf("failed to get qrep destination connector: %w", err) @@ -431,13 +431,13 @@ func replicateXminPartition[TRead any, TWrite any, TSync connectors.QRepSyncConn logger := activity.GetLogger(ctx) startTime := time.Now() - srcConn, err := connectors.GetAs[*connpostgres.PostgresConnector](ctx, config.SourcePeer) + srcConn, err := connectors.GetByNameAs[*connpostgres.PostgresConnector](ctx, a.CatalogPool, config.SourceName) if err != nil { return 0, fmt.Errorf("failed to get qrep source connector: %w", err) } defer connectors.CloseConnector(ctx, srcConn) - dstConn, err := connectors.GetAs[TSync](ctx, config.DestinationPeer) + dstConn, err := connectors.GetByNameAs[TSync](ctx, a.CatalogPool, config.DestinationName) if err != nil { return 0, fmt.Errorf("failed to get qrep destination connector: %w", err) } diff --git a/flow/activities/snapshot_activity.go b/flow/activities/snapshot_activity.go index 6faea50ee5..03b224f787 100644 --- a/flow/activities/snapshot_activity.go +++ b/flow/activities/snapshot_activity.go @@ -2,11 +2,13 @@ package activities import ( "context" + "errors" "fmt" "log/slog" "sync" "time" + "github.com/jackc/pgx/v5/pgxpool" "go.temporal.io/sdk/activity" "github.com/PeerDB-io/peer-flow/alerting" @@ -29,6 +31,7 @@ type TxSnapshotState struct { type SnapshotActivity struct { Alerter *alerting.Alerter + CatalogPool *pgxpool.Pool SlotSnapshotStates map[string]SlotSnapshotState TxSnapshotStates map[string]TxSnapshotState SnapshotStatesMutex sync.Mutex @@ -56,16 +59,14 @@ func (a *SnapshotActivity) SetupReplication( ctx = context.WithValue(ctx, shared.FlowNameKey, config.FlowJobName) logger := activity.GetLogger(ctx) - dbType := config.PeerConnectionConfig.Type - if dbType != protos.DBType_POSTGRES { - logger.Info(fmt.Sprintf("setup replication is no-op for %s", dbType)) - return nil, nil - } - a.Alerter.LogFlowEvent(ctx, config.FlowJobName, "Started Snapshot Flow Job") - conn, err := connectors.GetCDCPullConnector(ctx, config.PeerConnectionConfig) + conn, err := connectors.GetByNameAs[*connpostgres.PostgresConnector](ctx, a.CatalogPool, config.PeerName) if err != nil { + if errors.Is(err, errors.ErrUnsupported) { + logger.Info("setup replication is no-op for non-postgres source") + return nil, nil + } return nil, fmt.Errorf("failed to get connector: %w", err) } @@ -80,14 +81,10 @@ func (a *SnapshotActivity) SetupReplication( connectors.CloseConnector(ctx, conn) } - // This now happens in a goroutine go func() { - pgConn := conn.(*connpostgres.PostgresConnector) - err = pgConn.SetupReplication(ctx, slotSignal, config) - if err != nil { + if err := conn.SetupReplication(ctx, slotSignal, config); err != nil { closeConnectionForError(err) replicationErr <- err - return } }() @@ -122,8 +119,8 @@ func (a *SnapshotActivity) SetupReplication( }, nil } -func (a *SnapshotActivity) MaintainTx(ctx context.Context, sessionID string, peer *protos.Peer) error { - conn, err := connectors.GetCDCPullConnector(ctx, peer) +func (a *SnapshotActivity) MaintainTx(ctx context.Context, sessionID string, peer string) error { + conn, err := connectors.GetByNameAs[connectors.CDCPullConnector](ctx, a.CatalogPool, peer) if err != nil { return err } diff --git a/flow/cmd/handler.go b/flow/cmd/handler.go index d038f41cf1..ba445d3602 100644 --- a/flow/cmd/handler.go +++ b/flow/cmd/handler.go @@ -14,6 +14,7 @@ import ( "google.golang.org/protobuf/proto" "github.com/PeerDB-io/peer-flow/alerting" + "github.com/PeerDB-io/peer-flow/connectors" "github.com/PeerDB-io/peer-flow/connectors/utils" "github.com/PeerDB-io/peer-flow/generated/protos" "github.com/PeerDB-io/peer-flow/model" @@ -60,16 +61,16 @@ func schemaForTableIdentifier(tableIdentifier string, peerDBType int32) string { func (h *FlowRequestHandler) createCdcJobEntry(ctx context.Context, req *protos.CreateCDCFlowRequest, workflowID string, ) error { - sourcePeerID, sourePeerType, srcErr := h.getPeerID(ctx, req.ConnectionConfigs.Source.Name) + sourcePeerID, sourePeerType, srcErr := h.getPeerID(ctx, req.ConnectionConfigs.SourceName) if srcErr != nil { return fmt.Errorf("unable to get peer id for source peer %s: %w", - req.ConnectionConfigs.Source.Name, srcErr) + req.ConnectionConfigs.SourceName, srcErr) } - destinationPeerID, destinationPeerType, dstErr := h.getPeerID(ctx, req.ConnectionConfigs.Destination.Name) + destinationPeerID, destinationPeerType, dstErr := h.getPeerID(ctx, req.ConnectionConfigs.DestinationName) if dstErr != nil { return fmt.Errorf("unable to get peer id for target peer %s: %w", - req.ConnectionConfigs.Destination.Name, srcErr) + req.ConnectionConfigs.DestinationName, srcErr) } for _, v := range req.ConnectionConfigs.TableMappings { @@ -92,14 +93,14 @@ func (h *FlowRequestHandler) createCdcJobEntry(ctx context.Context, func (h *FlowRequestHandler) createQRepJobEntry(ctx context.Context, req *protos.CreateQRepFlowRequest, workflowID string, ) error { - sourcePeerName := req.QrepConfig.SourcePeer.Name + sourcePeerName := req.QrepConfig.SourceName sourcePeerID, _, srcErr := h.getPeerID(ctx, sourcePeerName) if srcErr != nil { return fmt.Errorf("unable to get peer id for source peer %s: %w", sourcePeerName, srcErr) } - destinationPeerName := req.QrepConfig.DestinationPeer.Name + destinationPeerName := req.QrepConfig.DestinationName destinationPeerID, _, dstErr := h.getPeerID(ctx, destinationPeerName) if dstErr != nil { return fmt.Errorf("unable to get peer id for target peer %s: %w", @@ -167,10 +168,7 @@ func (h *FlowRequestHandler) updateFlowConfigInCatalog( ctx context.Context, cfg *protos.FlowConnectionConfigs, ) error { - var cfgBytes []byte - var err error - - cfgBytes, err = proto.Marshal(cfg) + cfgBytes, err := proto.Marshal(cfg) if err != nil { return fmt.Errorf("unable to marshal flow config: %w", err) } @@ -208,15 +206,18 @@ func (h *FlowRequestHandler) CreateQRepFlow( }, } if req.CreateCatalogEntry { - err := h.createQRepJobEntry(ctx, req, workflowID) - if err != nil { + if err := h.createQRepJobEntry(ctx, req, workflowID); err != nil { slog.Error("unable to create flow job entry", slog.Any("error", err), slog.String("flowName", cfg.FlowJobName)) return nil, fmt.Errorf("unable to create flow job entry: %w", err) } } + dbtype, err := connectors.LoadPeerType(ctx, h.pool, cfg.SourceName) + if err != nil { + return nil, err + } var workflowFn interface{} - if cfg.SourcePeer.Type == protos.DBType_POSTGRES && cfg.WatermarkColumn == "xmin" { + if dbtype == protos.DBType_POSTGRES && cfg.WatermarkColumn == "xmin" { workflowFn = peerflow.XminFlowWorkflow } else { workflowFn = peerflow.QRepFlowWorkflow @@ -226,8 +227,7 @@ func (h *FlowRequestHandler) CreateQRepFlow( cfg.SyncedAtColName = "_PEERDB_SYNCED_AT" } - _, err := h.temporalClient.ExecuteWorkflow(ctx, workflowOptions, workflowFn, cfg, nil) - if err != nil { + if _, err := h.temporalClient.ExecuteWorkflow(ctx, workflowOptions, workflowFn, cfg, nil); err != nil { slog.Error("unable to start QRepFlow workflow", slog.Any("error", err), slog.String("flowName", cfg.FlowJobName)) return nil, fmt.Errorf("unable to start QRepFlow workflow: %w", err) diff --git a/flow/cmd/mirror_status.go b/flow/cmd/mirror_status.go index 5f59574889..2a5d53f2e8 100644 --- a/flow/cmd/mirror_status.go +++ b/flow/cmd/mirror_status.go @@ -1,3 +1,4 @@ +//nolint:staticcheck // TODO remove in 0.15 package cmd import ( @@ -10,6 +11,7 @@ import ( "google.golang.org/protobuf/proto" "google.golang.org/protobuf/types/known/timestamppb" + "github.com/PeerDB-io/peer-flow/connectors" "github.com/PeerDB-io/peer-flow/generated/protos" "github.com/PeerDB-io/peer-flow/shared" peerflow "github.com/PeerDB-io/peer-flow/workflows" @@ -120,6 +122,17 @@ func (h *FlowRequestHandler) CDCFlowStatus( return nil, err } + // TODO remove in 0.15 + // patching config to use new fields on ui + if config.Source != nil { + config.SourceName = config.Source.Name + config.Source = nil + } + if config.Destination != nil { + config.DestinationName = config.Destination.Name + config.Destination = nil + } + // patching config to show latest values from state if state.SyncFlowOptions != nil { config.IdleTimeoutSeconds = state.SyncFlowOptions.IdleTimeoutSeconds @@ -127,20 +140,27 @@ func (h *FlowRequestHandler) CDCFlowStatus( config.TableMappings = state.SyncFlowOptions.TableMappings } - var initialCopyStatus *protos.SnapshotStatus - - cloneStatuses, err := h.cloneTableSummary(ctx, req.FlowJobName) + srcType, err := connectors.LoadPeerType(ctx, h.pool, config.SourceName) + if err != nil { + return nil, err + } + dstType, err := connectors.LoadPeerType(ctx, h.pool, config.DestinationName) if err != nil { return nil, err } - initialCopyStatus = &protos.SnapshotStatus{ - Clones: cloneStatuses, + cloneStatuses, err := h.cloneTableSummary(ctx, req.FlowJobName) + if err != nil { + return nil, err } return &protos.CDCMirrorStatus{ - Config: config, - SnapshotStatus: initialCopyStatus, + Config: config, + SourceType: srcType, + DestinationType: dstType, + SnapshotStatus: &protos.SnapshotStatus{ + Clones: cloneStatuses, + }, }, nil } @@ -316,16 +336,14 @@ func (h *FlowRequestHandler) getFlowConfigFromCatalog( flowJobName string, ) (*protos.FlowConnectionConfigs, error) { var configBytes sql.RawBytes - var err error - var config protos.FlowConnectionConfigs - - err = h.pool.QueryRow(ctx, + err := h.pool.QueryRow(ctx, "SELECT config_proto FROM flows WHERE name = $1", flowJobName).Scan(&configBytes) if err != nil { slog.Error("unable to query flow config from catalog", slog.Any("error", err)) return nil, fmt.Errorf("unable to query flow config from catalog: %w", err) } + var config protos.FlowConnectionConfigs err = proto.Unmarshal(configBytes, &config) if err != nil { slog.Error("unable to unmarshal flow config", slog.Any("error", err)) @@ -386,8 +404,7 @@ func (h *FlowRequestHandler) getCDCWorkflowState(ctx context.Context, fmt.Errorf("failed to get state in workflow with ID %s: %w", workflowID, err) } var state peerflow.CDCFlowWorkflowState - err = res.Get(&state) - if err != nil { + if err := res.Get(&state); err != nil { slog.Error(fmt.Sprintf("failed to get state in workflow with ID %s: %s", workflowID, err.Error())) return nil, fmt.Errorf("failed to get state in workflow with ID %s: %w", workflowID, err) diff --git a/flow/cmd/snapshot_worker.go b/flow/cmd/snapshot_worker.go index ee49dbc039..c62c7f8d42 100644 --- a/flow/cmd/snapshot_worker.go +++ b/flow/cmd/snapshot_worker.go @@ -71,6 +71,7 @@ func SnapshotWorkerMain(opts *SnapshotWorkerOptions) (client.Client, worker.Work SlotSnapshotStates: make(map[string]activities.SlotSnapshotState), TxSnapshotStates: make(map[string]activities.TxSnapshotState), Alerter: alerting.NewAlerter(context.Background(), conn), + CatalogPool: conn, }) return c, w, nil diff --git a/flow/cmd/validate_mirror.go b/flow/cmd/validate_mirror.go index f67a4f1d40..2736186463 100644 --- a/flow/cmd/validate_mirror.go +++ b/flow/cmd/validate_mirror.go @@ -8,6 +8,7 @@ import ( "github.com/jackc/pgx/v5/pgtype" + "github.com/PeerDB-io/peer-flow/connectors" connpostgres "github.com/PeerDB-io/peer-flow/connectors/postgres" "github.com/PeerDB-io/peer-flow/connectors/utils" "github.com/PeerDB-io/peer-flow/generated/protos" @@ -43,12 +44,19 @@ func (h *FlowRequestHandler) ValidateCDCMirror( Ok: false, }, errors.New("connection configs is nil") } - sourcePeerConfig := req.ConnectionConfigs.Source.GetPostgresConfig() + sourcePeer, err := connectors.LoadPeer(ctx, h.pool, req.ConnectionConfigs.SourceName) + if err != nil { + return &protos.ValidateCDCMirrorResponse{ + Ok: false, + }, err + } + + sourcePeerConfig := sourcePeer.GetPostgresConfig() if sourcePeerConfig == nil { - slog.Error("/validatecdc source peer config is nil", slog.Any("peer", req.ConnectionConfigs.Source)) + slog.Error("/validatecdc source peer config is not postgres", slog.String("peer", req.ConnectionConfigs.SourceName)) return &protos.ValidateCDCMirrorResponse{ Ok: false, - }, errors.New("source peer config is nil") + }, errors.New("source peer config is not postgres") } pgPeer, err := connpostgres.NewPostgresConnector(ctx, sourcePeerConfig) diff --git a/flow/cmd/validate_peer.go b/flow/cmd/validate_peer.go index bae2a1f8b6..1ec957b862 100644 --- a/flow/cmd/validate_peer.go +++ b/flow/cmd/validate_peer.go @@ -39,7 +39,6 @@ func (h *FlowRequestHandler) ValidatePeer( Message: displayErr, }, nil } - defer conn.Close() if req.Peer.Type == protos.DBType_POSTGRES { @@ -58,10 +57,8 @@ func (h *FlowRequestHandler) ValidatePeer( } } - validationConn, ok := conn.(connectors.ValidationConnector) - if ok { - validErr := validationConn.ValidateCheck(ctx) - if validErr != nil { + if validationConn, ok := conn.(connectors.ValidationConnector); ok { + if validErr := validationConn.ValidateCheck(ctx); validErr != nil { displayErr := fmt.Sprintf("failed to validate peer %s: %v", req.Peer.Name, validErr) h.alerter.LogNonFlowWarning(ctx, telemetry.CreatePeer, req.Peer.Name, displayErr, @@ -73,8 +70,7 @@ func (h *FlowRequestHandler) ValidatePeer( } } - connErr := conn.ConnectionActive(ctx) - if connErr != nil { + if connErr := conn.ConnectionActive(ctx); connErr != nil { displayErr := fmt.Sprintf("failed to establish active connection to %s peer %s: %v", req.Peer.Type, req.Peer.Name, connErr) h.alerter.LogNonFlowWarning(ctx, telemetry.CreatePeer, req.Peer.Name, displayErr, diff --git a/flow/connectors/core.go b/flow/connectors/core.go index 3f1580216d..3a69765e41 100644 --- a/flow/connectors/core.go +++ b/flow/connectors/core.go @@ -3,9 +3,11 @@ package connectors import ( "context" "errors" + "fmt" "log/slog" "github.com/jackc/pgx/v5/pgxpool" + "google.golang.org/protobuf/proto" "github.com/PeerDB-io/peer-flow/alerting" connbigquery "github.com/PeerDB-io/peer-flow/connectors/bigquery" @@ -182,6 +184,12 @@ type CDCNormalizeConnector interface { NormalizeRecords(ctx context.Context, req *model.NormalizeRecordsRequest) (*model.NormalizeResponse, error) } +type CreateTablesFromExistingConnector interface { + Connector + + CreateTablesFromExisting(context.Context, *protos.CreateTablesFromExistingInput) (*protos.CreateTablesFromExistingOutput, error) +} + type QRepPullConnectorCore interface { Connector @@ -247,6 +255,105 @@ type RenameTablesConnector interface { RenameTables(context.Context, *protos.RenameTablesInput) (*protos.RenameTablesOutput, error) } +func LoadPeerType(ctx context.Context, catalogPool *pgxpool.Pool, peerName string) (protos.DBType, error) { + row := catalogPool.QueryRow(ctx, "SELECT type FROM peers WHERE name = $1", peerName) + var dbtype protos.DBType + err := row.Scan(&dbtype) + return dbtype, err +} + +func LoadPeer(ctx context.Context, catalogPool *pgxpool.Pool, peerName string) (*protos.Peer, error) { + row := catalogPool.QueryRow(ctx, ` + SELECT type, options + FROM peers + WHERE name = $1`, peerName) + + peer := &protos.Peer{Name: peerName} + var peerOptions []byte + if err := row.Scan(&peer.Type, &peerOptions); err != nil { + return nil, fmt.Errorf("failed to load peer: %w", err) + } + + switch peer.Type { + case protos.DBType_BIGQUERY: + var config protos.BigqueryConfig + if err := proto.Unmarshal(peerOptions, &config); err != nil { + return nil, fmt.Errorf("failed to unmarshal BigQuery config: %w", err) + } + peer.Config = &protos.Peer_BigqueryConfig{BigqueryConfig: &config} + case protos.DBType_SNOWFLAKE: + var config protos.SnowflakeConfig + if err := proto.Unmarshal(peerOptions, &config); err != nil { + return nil, fmt.Errorf("failed to unmarshal Snowflake config: %w", err) + } + peer.Config = &protos.Peer_SnowflakeConfig{SnowflakeConfig: &config} + case protos.DBType_MONGO: + var config protos.MongoConfig + if err := proto.Unmarshal(peerOptions, &config); err != nil { + return nil, fmt.Errorf("failed to unmarshal MongoDB config: %w", err) + } + peer.Config = &protos.Peer_MongoConfig{MongoConfig: &config} + case protos.DBType_POSTGRES: + var config protos.PostgresConfig + if err := proto.Unmarshal(peerOptions, &config); err != nil { + return nil, fmt.Errorf("failed to unmarshal Postgres config: %w", err) + } + peer.Config = &protos.Peer_PostgresConfig{PostgresConfig: &config} + case protos.DBType_S3: + var config protos.S3Config + if err := proto.Unmarshal(peerOptions, &config); err != nil { + return nil, fmt.Errorf("failed to unmarshal S3 config: %w", err) + } + peer.Config = &protos.Peer_S3Config{S3Config: &config} + case protos.DBType_SQLSERVER: + var config protos.SqlServerConfig + if err := proto.Unmarshal(peerOptions, &config); err != nil { + return nil, fmt.Errorf("failed to unmarshal SQL Server config: %w", err) + } + peer.Config = &protos.Peer_SqlserverConfig{SqlserverConfig: &config} + case protos.DBType_MYSQL: + var config protos.MySqlConfig + if err := proto.Unmarshal(peerOptions, &config); err != nil { + return nil, fmt.Errorf("failed to unmarshal MySQL config: %w", err) + } + peer.Config = &protos.Peer_MysqlConfig{MysqlConfig: &config} + case protos.DBType_CLICKHOUSE: + var config protos.ClickhouseConfig + if err := proto.Unmarshal(peerOptions, &config); err != nil { + return nil, fmt.Errorf("failed to unmarshal ClickHouse config: %w", err) + } + peer.Config = &protos.Peer_ClickhouseConfig{ClickhouseConfig: &config} + case protos.DBType_KAFKA: + var config protos.KafkaConfig + if err := proto.Unmarshal(peerOptions, &config); err != nil { + return nil, fmt.Errorf("failed to unmarshal Kafka config: %w", err) + } + peer.Config = &protos.Peer_KafkaConfig{KafkaConfig: &config} + case protos.DBType_PUBSUB: + var config protos.PubSubConfig + if err := proto.Unmarshal(peerOptions, &config); err != nil { + return nil, fmt.Errorf("failed to unmarshal Pub/Sub config: %w", err) + } + peer.Config = &protos.Peer_PubsubConfig{PubsubConfig: &config} + case protos.DBType_EVENTHUBS: + var config protos.EventHubGroupConfig + if err := proto.Unmarshal(peerOptions, &config); err != nil { + return nil, fmt.Errorf("failed to unmarshal Event Hubs config: %w", err) + } + peer.Config = &protos.Peer_EventhubGroupConfig{EventhubGroupConfig: &config} + case protos.DBType_ELASTICSEARCH: + var config protos.ElasticsearchConfig + if err := proto.Unmarshal(peerOptions, &config); err != nil { + return nil, fmt.Errorf("failed to unmarshal Elasticsearch config: %w", err) + } + peer.Config = &protos.Peer_ElasticsearchConfig{ElasticsearchConfig: &config} + default: + return nil, fmt.Errorf("unsupported peer type: %s", peer.Type) + } + + return peer, nil +} + func GetConnector(ctx context.Context, config *protos.Peer) (Connector, error) { switch inner := config.Config.(type) { case *protos.Peer_PostgresConfig: @@ -290,28 +397,13 @@ func GetAs[T Connector](ctx context.Context, config *protos.Peer) (T, error) { } } -func GetCDCPullConnector(ctx context.Context, config *protos.Peer) (CDCPullConnector, error) { - return GetAs[CDCPullConnector](ctx, config) -} - -func GetCDCSyncConnector(ctx context.Context, config *protos.Peer) (CDCSyncConnector, error) { - return GetAs[CDCSyncConnector](ctx, config) -} - -func GetCDCNormalizeConnector(ctx context.Context, config *protos.Peer) (CDCNormalizeConnector, error) { - return GetAs[CDCNormalizeConnector](ctx, config) -} - -func GetQRepPullConnector(ctx context.Context, config *protos.Peer) (QRepPullConnector, error) { - return GetAs[QRepPullConnector](ctx, config) -} - -func GetQRepSyncConnector(ctx context.Context, config *protos.Peer) (QRepSyncConnector, error) { - return GetAs[QRepSyncConnector](ctx, config) -} - -func GetQRepConsolidateConnector(ctx context.Context, config *protos.Peer) (QRepConsolidateConnector, error) { - return GetAs[QRepConsolidateConnector](ctx, config) +func GetByNameAs[T Connector](ctx context.Context, catalogPool *pgxpool.Pool, name string) (T, error) { + peer, err := LoadPeer(ctx, catalogPool, name) + if err != nil { + var none T + return none, err + } + return GetAs[T](ctx, peer) } func CloseConnector(ctx context.Context, conn Connector) { @@ -352,6 +444,9 @@ var ( _ NormalizedTablesConnector = &connsnowflake.SnowflakeConnector{} _ NormalizedTablesConnector = &connclickhouse.ClickhouseConnector{} + _ CreateTablesFromExistingConnector = &connbigquery.BigQueryConnector{} + _ CreateTablesFromExistingConnector = &connsnowflake.SnowflakeConnector{} + _ QRepPullConnector = &connpostgres.PostgresConnector{} _ QRepPullConnector = &connsqlserver.SQLServerConnector{} diff --git a/flow/connectors/postgres/qrep.go b/flow/connectors/postgres/qrep.go index 843761c7e6..7b15c3a595 100644 --- a/flow/connectors/postgres/qrep.go +++ b/flow/connectors/postgres/qrep.go @@ -61,15 +61,14 @@ func (c *PostgresConnector) GetQRepPartitions( } defer shared.RollbackTx(getPartitionsTx, c.logger) - if err := c.setTransactionSnapshot(ctx, getPartitionsTx); err != nil { + if err := c.setTransactionSnapshot(ctx, getPartitionsTx, config.SnapshotName); err != nil { return nil, fmt.Errorf("failed to set transaction snapshot: %w", err) } return c.getNumRowsPartitions(ctx, getPartitionsTx, config, last) } -func (c *PostgresConnector) setTransactionSnapshot(ctx context.Context, tx pgx.Tx) error { - snapshot := c.config.TransactionSnapshot +func (c *PostgresConnector) setTransactionSnapshot(ctx context.Context, tx pgx.Tx, snapshot string) error { if snapshot != "" { if _, err := tx.Exec(ctx, "SET TRANSACTION SNAPSHOT "+QuoteLiteral(snapshot)); err != nil { return fmt.Errorf("failed to set transaction snapshot: %w", err) @@ -341,9 +340,7 @@ func corePullQRepRecords( partitionIdLog := slog.String(string(shared.PartitionIDKey), partition.PartitionId) if partition.FullTablePartition { c.logger.Info("pulling full table partition", partitionIdLog) - executor := c.NewQRepQueryExecutorSnapshot(c.config.TransactionSnapshot, - config.FlowJobName, partition.PartitionId) - + executor := c.NewQRepQueryExecutorSnapshot(config.SnapshotName, config.FlowJobName, partition.PartitionId) _, err := executor.ExecuteQueryIntoSink(ctx, sink, config.Query) return 0, err } @@ -382,8 +379,7 @@ func corePullQRepRecords( return 0, err } - executor := c.NewQRepQueryExecutorSnapshot(c.config.TransactionSnapshot, - config.FlowJobName, partition.PartitionId) + executor := c.NewQRepQueryExecutorSnapshot(config.SnapshotName, config.FlowJobName, partition.PartitionId) numRecords, err := executor.ExecuteQueryIntoSink(ctx, sink, query, rangeStart, rangeEnd) if err != nil { @@ -683,8 +679,7 @@ func pullXminRecordStream( queryArgs = []interface{}{strconv.FormatInt(partition.Range.Range.(*protos.PartitionRange_IntRange).IntRange.Start&0xffffffff, 10)} } - executor := c.NewQRepQueryExecutorSnapshot(c.config.TransactionSnapshot, - config.FlowJobName, partition.PartitionId) + executor := c.NewQRepQueryExecutorSnapshot(config.SnapshotName, config.FlowJobName, partition.PartitionId) numRecords, currentSnapshotXmin, err := executor.ExecuteQueryIntoSinkGettingCurrentSnapshotXmin( ctx, diff --git a/flow/connectors/snowflake/qrep.go b/flow/connectors/snowflake/qrep.go index ee3d12c5c7..22c780882f 100644 --- a/flow/connectors/snowflake/qrep.go +++ b/flow/connectors/snowflake/qrep.go @@ -140,10 +140,7 @@ func (c *SnowflakeConnector) createExternalStage(ctx context.Context, stageName cleanURL := fmt.Sprintf("s3://%s/%s/%s", s3o.Bucket, s3o.Prefix, config.FlowJobName) - var s3Int string - if config.DestinationPeer != nil { - s3Int = config.DestinationPeer.GetSnowflakeConfig().S3Integration - } + s3Int := c.config.S3Integration provider, err := utils.GetAWSCredentialsProvider(ctx, "snowflake", utils.PeerAWSCredentials{}) if err != nil { return "", err diff --git a/flow/connectors/snowflake/snowflake.go b/flow/connectors/snowflake/snowflake.go index 83f2e504f0..ec897df673 100644 --- a/flow/connectors/snowflake/snowflake.go +++ b/flow/connectors/snowflake/snowflake.go @@ -81,6 +81,7 @@ type SnowflakeConnector struct { *metadataStore.PostgresMetadata database *sql.DB logger log.Logger + config *protos.SnowflakeConfig rawSchema string } @@ -222,6 +223,7 @@ func NewSnowflakeConnector( database: database, rawSchema: rawSchema, logger: logger, + config: snowflakeProtoConfig, }, nil } diff --git a/flow/connectors/utils/peers.go b/flow/connectors/utils/peers.go index 92947e80fa..b871e60373 100644 --- a/flow/connectors/utils/peers.go +++ b/flow/connectors/utils/peers.go @@ -17,88 +17,78 @@ func CreatePeerNoValidate( peer *protos.Peer, ) (*protos.CreatePeerResponse, error) { config := peer.Config + peerType := peer.Type wrongConfigResponse := &protos.CreatePeerResponse{ Status: protos.CreatePeerStatus_FAILED, Message: fmt.Sprintf("invalid config for %s peer %s", - peer.Type, peer.Name), + peerType, peer.Name), } - var encodedConfig []byte - var encodingErr error - peerType := peer.Type + var innerConfig proto.Message switch peerType { case protos.DBType_POSTGRES: pgConfigObject, ok := config.(*protos.Peer_PostgresConfig) if !ok { return wrongConfigResponse, nil } - pgConfig := pgConfigObject.PostgresConfig - encodedConfig, encodingErr = proto.Marshal(pgConfig) + innerConfig = pgConfigObject.PostgresConfig case protos.DBType_SNOWFLAKE: sfConfigObject, ok := config.(*protos.Peer_SnowflakeConfig) if !ok { return wrongConfigResponse, nil } - sfConfig := sfConfigObject.SnowflakeConfig - encodedConfig, encodingErr = proto.Marshal(sfConfig) + innerConfig = sfConfigObject.SnowflakeConfig case protos.DBType_BIGQUERY: bqConfigObject, ok := config.(*protos.Peer_BigqueryConfig) if !ok { return wrongConfigResponse, nil } - bqConfig := bqConfigObject.BigqueryConfig - encodedConfig, encodingErr = proto.Marshal(bqConfig) + innerConfig = bqConfigObject.BigqueryConfig case protos.DBType_SQLSERVER: sqlServerConfigObject, ok := config.(*protos.Peer_SqlserverConfig) if !ok { return wrongConfigResponse, nil } - sqlServerConfig := sqlServerConfigObject.SqlserverConfig - encodedConfig, encodingErr = proto.Marshal(sqlServerConfig) + innerConfig = sqlServerConfigObject.SqlserverConfig case protos.DBType_S3: s3ConfigObject, ok := config.(*protos.Peer_S3Config) if !ok { return wrongConfigResponse, nil } - s3Config := s3ConfigObject.S3Config - encodedConfig, encodingErr = proto.Marshal(s3Config) + innerConfig = s3ConfigObject.S3Config case protos.DBType_CLICKHOUSE: chConfigObject, ok := config.(*protos.Peer_ClickhouseConfig) if !ok { return wrongConfigResponse, nil } - chConfig := chConfigObject.ClickhouseConfig - encodedConfig, encodingErr = proto.Marshal(chConfig) + innerConfig = chConfigObject.ClickhouseConfig case protos.DBType_KAFKA: kaConfigObject, ok := config.(*protos.Peer_KafkaConfig) if !ok { return wrongConfigResponse, nil } - kaConfig := kaConfigObject.KafkaConfig - encodedConfig, encodingErr = proto.Marshal(kaConfig) + innerConfig = kaConfigObject.KafkaConfig case protos.DBType_PUBSUB: psConfigObject, ok := config.(*protos.Peer_PubsubConfig) if !ok { return wrongConfigResponse, nil } - psConfig := psConfigObject.PubsubConfig - encodedConfig, encodingErr = proto.Marshal(psConfig) + innerConfig = psConfigObject.PubsubConfig case protos.DBType_EVENTHUBS: ehConfigObject, ok := config.(*protos.Peer_EventhubGroupConfig) if !ok { return wrongConfigResponse, nil } - ehConfig := ehConfigObject.EventhubGroupConfig - encodedConfig, encodingErr = proto.Marshal(ehConfig) + innerConfig = ehConfigObject.EventhubGroupConfig case protos.DBType_ELASTICSEARCH: esConfigObject, ok := config.(*protos.Peer_ElasticsearchConfig) if !ok { return wrongConfigResponse, nil } - esConfig := esConfigObject.ElasticsearchConfig - encodedConfig, encodingErr = proto.Marshal(esConfig) + innerConfig = esConfigObject.ElasticsearchConfig default: return wrongConfigResponse, nil } + encodedConfig, encodingErr := proto.Marshal(innerConfig) if encodingErr != nil { slog.Error(fmt.Sprintf("failed to encode peer configuration for %s peer %s : %v", peer.Type, peer.Name, encodingErr)) diff --git a/flow/e2e/bigquery/bigquery.go b/flow/e2e/bigquery/bigquery.go index ae57c0a449..1a6e022aea 100644 --- a/flow/e2e/bigquery/bigquery.go +++ b/flow/e2e/bigquery/bigquery.go @@ -46,7 +46,16 @@ func (s PeerFlowE2ETestSuiteBQ) Suffix() string { } func (s PeerFlowE2ETestSuiteBQ) Peer() *protos.Peer { - return s.bqHelper.Peer + s.t.Helper() + ret := &protos.Peer{ + Name: e2e.AddSuffix(s, "test_bq_peer"), + Type: protos.DBType_BIGQUERY, + Config: &protos.Peer_BigqueryConfig{ + BigqueryConfig: s.bqHelper.Config, + }, + } + e2e.CreatePeer(s.t, ret) + return ret } func (s PeerFlowE2ETestSuiteBQ) DestinationTable(table string) string { @@ -89,7 +98,7 @@ func SetupSuite(t *testing.T) PeerFlowE2ETestSuiteBQ { t.Fatalf("failed to setup postgres: %v", err) } - bqHelper, err := NewBigQueryTestHelper() + bqHelper, err := NewBigQueryTestHelper(t) if err != nil { t.Fatalf("Failed to create helper: %v", err) } diff --git a/flow/e2e/bigquery/bigquery_helper.go b/flow/e2e/bigquery/bigquery_helper.go index 6138c036e0..1ee303acf8 100644 --- a/flow/e2e/bigquery/bigquery_helper.go +++ b/flow/e2e/bigquery/bigquery_helper.go @@ -8,6 +8,7 @@ import ( "math/big" "os" "strings" + "testing" "time" "cloud.google.com/go/bigquery" @@ -26,8 +27,6 @@ import ( type BigQueryTestHelper struct { // config is the BigQuery config. Config *protos.BigqueryConfig - // peer struct holder BigQuery - Peer *protos.Peer // client to talk to BigQuery client *bigquery.Client // runID uniquely identifies the test run to namespace stateful schemas. @@ -35,7 +34,8 @@ type BigQueryTestHelper struct { } // NewBigQueryTestHelper creates a new BigQueryTestHelper. -func NewBigQueryTestHelper() (*BigQueryTestHelper, error) { +func NewBigQueryTestHelper(t *testing.T) (*BigQueryTestHelper, error) { + t.Helper() // random 64 bit int to namespace stateful schemas. runID, err := shared.RandomUInt64() if err != nil { @@ -53,8 +53,7 @@ func NewBigQueryTestHelper() (*BigQueryTestHelper, error) { } var config *protos.BigqueryConfig - err = json.Unmarshal(content, &config) - if err != nil { + if err := json.Unmarshal(content, &config); err != nil { return nil, fmt.Errorf("failed to unmarshal json: %w", err) } @@ -71,28 +70,13 @@ func NewBigQueryTestHelper() (*BigQueryTestHelper, error) { return nil, fmt.Errorf("failed to create helper BigQuery client: %v", err) } - peer := generateBQPeer(config) - return &BigQueryTestHelper{ runID: runID, Config: config, client: client, - Peer: peer, }, nil } -func generateBQPeer(bigQueryConfig *protos.BigqueryConfig) *protos.Peer { - ret := &protos.Peer{} - ret.Name = "test_bq_peer" - ret.Type = protos.DBType_BIGQUERY - - ret.Config = &protos.Peer_BigqueryConfig{ - BigqueryConfig: bigQueryConfig, - } - - return ret -} - // datasetExists checks if the dataset exists. func (b *BigQueryTestHelper) datasetExists(dataset *bigquery.Dataset) (bool, error) { meta, err := dataset.Metadata(context.Background()) diff --git a/flow/e2e/bigquery/peer_flow_bq_test.go b/flow/e2e/bigquery/peer_flow_bq_test.go index 506152aeed..1b1426b54f 100644 --- a/flow/e2e/bigquery/peer_flow_bq_test.go +++ b/flow/e2e/bigquery/peer_flow_bq_test.go @@ -112,10 +112,10 @@ func (s PeerFlowE2ETestSuiteBQ) Test_Complete_Flow_No_Data() { connectionGen := e2e.FlowConnectionGenerationConfig{ FlowJobName: s.attachSuffix("test_complete_flow_no_data"), TableNameMapping: map[string]string{srcTableName: dstTableName}, - Destination: s.bqHelper.Peer, + Destination: s.Peer().Name, } - flowConnConfig := connectionGen.GenerateFlowConnectionConfigs() + flowConnConfig := connectionGen.GenerateFlowConnectionConfigs(s.t) flowConnConfig.MaxBatchSize = 1 env := e2e.ExecutePeerflow(tc, peerflow.CDCFlowWorkflow, flowConnConfig, nil) @@ -143,10 +143,10 @@ func (s PeerFlowE2ETestSuiteBQ) Test_Char_ColType_Error() { connectionGen := e2e.FlowConnectionGenerationConfig{ FlowJobName: s.attachSuffix("test_char_table"), TableNameMapping: map[string]string{srcTableName: dstTableName}, - Destination: s.bqHelper.Peer, + Destination: s.Peer().Name, } - flowConnConfig := connectionGen.GenerateFlowConnectionConfigs() + flowConnConfig := connectionGen.GenerateFlowConnectionConfigs(s.t) flowConnConfig.MaxBatchSize = 1 env := e2e.ExecutePeerflow(tc, peerflow.CDCFlowWorkflow, flowConnConfig, nil) @@ -175,10 +175,10 @@ func (s PeerFlowE2ETestSuiteBQ) Test_Toast_BQ() { connectionGen := e2e.FlowConnectionGenerationConfig{ FlowJobName: s.attachSuffix("test_toast_bq_1"), TableNameMapping: map[string]string{srcTableName: dstTableName}, - Destination: s.bqHelper.Peer, + Destination: s.Peer().Name, } - flowConnConfig := connectionGen.GenerateFlowConnectionConfigs() + flowConnConfig := connectionGen.GenerateFlowConnectionConfigs(s.t) flowConnConfig.MaxBatchSize = 100 // wait for PeerFlowStatusQuery to finish setup @@ -226,10 +226,10 @@ func (s PeerFlowE2ETestSuiteBQ) Test_Toast_Advance_1_BQ() { connectionGen := e2e.FlowConnectionGenerationConfig{ FlowJobName: s.attachSuffix("test_toast_bq_3"), TableNameMapping: map[string]string{srcTableName: dstTableName}, - Destination: s.bqHelper.Peer, + Destination: s.Peer().Name, } - flowConnConfig := connectionGen.GenerateFlowConnectionConfigs() + flowConnConfig := connectionGen.GenerateFlowConnectionConfigs(s.t) flowConnConfig.MaxBatchSize = 100 // wait for PeerFlowStatusQuery to finish setup @@ -282,10 +282,10 @@ func (s PeerFlowE2ETestSuiteBQ) Test_Toast_Advance_2_BQ() { connectionGen := e2e.FlowConnectionGenerationConfig{ FlowJobName: s.attachSuffix("test_toast_bq_4"), TableNameMapping: map[string]string{srcTableName: dstTableName}, - Destination: s.bqHelper.Peer, + Destination: s.Peer().Name, } - flowConnConfig := connectionGen.GenerateFlowConnectionConfigs() + flowConnConfig := connectionGen.GenerateFlowConnectionConfigs(s.t) flowConnConfig.MaxBatchSize = 100 // wait for PeerFlowStatusQuery to finish setup @@ -332,10 +332,10 @@ func (s PeerFlowE2ETestSuiteBQ) Test_Toast_Advance_3_BQ() { connectionGen := e2e.FlowConnectionGenerationConfig{ FlowJobName: s.attachSuffix("test_toast_bq_5"), TableNameMapping: map[string]string{srcTableName: dstTableName}, - Destination: s.bqHelper.Peer, + Destination: s.Peer().Name, } - flowConnConfig := connectionGen.GenerateFlowConnectionConfigs() + flowConnConfig := connectionGen.GenerateFlowConnectionConfigs(s.t) flowConnConfig.MaxBatchSize = 100 // wait for PeerFlowStatusQuery to finish setup @@ -388,10 +388,10 @@ func (s PeerFlowE2ETestSuiteBQ) Test_Types_BQ() { connectionGen := e2e.FlowConnectionGenerationConfig{ FlowJobName: s.attachSuffix("test_types_bq"), TableNameMapping: map[string]string{srcTableName: dstTableName}, - Destination: s.bqHelper.Peer, + Destination: s.Peer().Name, } - flowConnConfig := connectionGen.GenerateFlowConnectionConfigs() + flowConnConfig := connectionGen.GenerateFlowConnectionConfigs(s.t) flowConnConfig.MaxBatchSize = 100 // wait for PeerFlowStatusQuery to finish setup @@ -470,10 +470,10 @@ func (s PeerFlowE2ETestSuiteBQ) Test_NaN_Doubles_BQ() { connectionGen := e2e.FlowConnectionGenerationConfig{ FlowJobName: s.attachSuffix("test_nans_bq"), TableNameMapping: map[string]string{srcTableName: dstTableName}, - Destination: s.bqHelper.Peer, + Destination: s.Peer().Name, } - flowConnConfig := connectionGen.GenerateFlowConnectionConfigs() + flowConnConfig := connectionGen.GenerateFlowConnectionConfigs(s.t) flowConnConfig.MaxBatchSize = 100 // wait for PeerFlowStatusQuery to finish setup @@ -522,10 +522,10 @@ func (s PeerFlowE2ETestSuiteBQ) Test_Invalid_Geo_BQ_Avro_CDC() { connectionGen := e2e.FlowConnectionGenerationConfig{ FlowJobName: s.attachSuffix("test_invalid_geo_bq_avro_cdc"), TableNameMapping: map[string]string{srcTableName: dstTableName}, - Destination: s.bqHelper.Peer, + Destination: s.Peer().Name, } - flowConnConfig := connectionGen.GenerateFlowConnectionConfigs() + flowConnConfig := connectionGen.GenerateFlowConnectionConfigs(s.t) flowConnConfig.MaxBatchSize = 100 // wait for PeerFlowStatusQuery to finish setup @@ -598,10 +598,10 @@ func (s PeerFlowE2ETestSuiteBQ) Test_Multi_Table_BQ() { connectionGen := e2e.FlowConnectionGenerationConfig{ FlowJobName: s.attachSuffix("test_multi_table_bq"), TableNameMapping: map[string]string{srcTable1Name: dstTable1Name, srcTable2Name: dstTable2Name}, - Destination: s.bqHelper.Peer, + Destination: s.Peer().Name, } - flowConnConfig := connectionGen.GenerateFlowConnectionConfigs() + flowConnConfig := connectionGen.GenerateFlowConnectionConfigs(s.t) flowConnConfig.MaxBatchSize = 100 // wait for PeerFlowStatusQuery to finish setup @@ -652,10 +652,10 @@ func (s PeerFlowE2ETestSuiteBQ) Test_Simple_Schema_Changes_BQ() { connectionGen := e2e.FlowConnectionGenerationConfig{ FlowJobName: s.attachSuffix(tableName), TableNameMapping: map[string]string{srcTableName: tableName}, - Destination: s.bqHelper.Peer, + Destination: s.Peer().Name, } - flowConnConfig := connectionGen.GenerateFlowConnectionConfigs() + flowConnConfig := connectionGen.GenerateFlowConnectionConfigs(s.t) flowConnConfig.MaxBatchSize = 100 // wait for PeerFlowStatusQuery to finish setup @@ -730,10 +730,10 @@ func (s PeerFlowE2ETestSuiteBQ) Test_All_Types_Schema_Changes_BQ() { connectionGen := e2e.FlowConnectionGenerationConfig{ FlowJobName: s.attachSuffix(tableName), TableNameMapping: map[string]string{srcTableName: tableName}, - Destination: s.bqHelper.Peer, + Destination: s.Peer().Name, } - flowConnConfig := connectionGen.GenerateFlowConnectionConfigs() + flowConnConfig := connectionGen.GenerateFlowConnectionConfigs(s.t) flowConnConfig.MaxBatchSize = 100 // wait for PeerFlowStatusQuery to finish setup @@ -798,10 +798,10 @@ func (s PeerFlowE2ETestSuiteBQ) Test_Composite_PKey_BQ() { connectionGen := e2e.FlowConnectionGenerationConfig{ FlowJobName: s.attachSuffix("test_cpkey_flow"), TableNameMapping: map[string]string{srcTableName: tableName}, - Destination: s.bqHelper.Peer, + Destination: s.Peer().Name, } - flowConnConfig := connectionGen.GenerateFlowConnectionConfigs() + flowConnConfig := connectionGen.GenerateFlowConnectionConfigs(s.t) flowConnConfig.MaxBatchSize = 100 // wait for PeerFlowStatusQuery to finish setup @@ -854,10 +854,10 @@ func (s PeerFlowE2ETestSuiteBQ) Test_Composite_PKey_Toast_1_BQ() { connectionGen := e2e.FlowConnectionGenerationConfig{ FlowJobName: s.attachSuffix("test_cpkey_toast1_flow"), TableNameMapping: map[string]string{srcTableName: dstTableName}, - Destination: s.bqHelper.Peer, + Destination: s.Peer().Name, } - flowConnConfig := connectionGen.GenerateFlowConnectionConfigs() + flowConnConfig := connectionGen.GenerateFlowConnectionConfigs(s.t) flowConnConfig.MaxBatchSize = 100 flowConnConfig.SoftDelete = false flowConnConfig.SoftDeleteColName = "" @@ -915,10 +915,10 @@ func (s PeerFlowE2ETestSuiteBQ) Test_Composite_PKey_Toast_2_BQ() { connectionGen := e2e.FlowConnectionGenerationConfig{ FlowJobName: s.attachSuffix("test_cpkey_toast2_flow"), TableNameMapping: map[string]string{srcTableName: tableName}, - Destination: s.bqHelper.Peer, + Destination: s.Peer().Name, } - flowConnConfig := connectionGen.GenerateFlowConnectionConfigs() + flowConnConfig := connectionGen.GenerateFlowConnectionConfigs(s.t) flowConnConfig.MaxBatchSize = 100 // wait for PeerFlowStatusQuery to finish setup @@ -965,11 +965,11 @@ func (s PeerFlowE2ETestSuiteBQ) Test_Columns_BQ() { connectionGen := e2e.FlowConnectionGenerationConfig{ FlowJobName: s.attachSuffix("test_peerdb_cols_mirror"), TableNameMapping: map[string]string{srcTableName: dstTableName}, - Destination: s.bqHelper.Peer, + Destination: s.Peer().Name, SoftDelete: true, } - flowConnConfig := connectionGen.GenerateFlowConnectionConfigs() + flowConnConfig := connectionGen.GenerateFlowConnectionConfigs(s.t) flowConnConfig.MaxBatchSize = 100 env := e2e.ExecutePeerflow(tc, peerflow.CDCFlowWorkflow, flowConnConfig, nil) @@ -1016,10 +1016,10 @@ func (s PeerFlowE2ETestSuiteBQ) Test_Multi_Table_Multi_Dataset_BQ() { srcTable1Name: dstTable1Name, srcTable2Name: fmt.Sprintf("%s.%s", secondDataset, dstTable2Name), }, - Destination: s.bqHelper.Peer, + Destination: s.Peer().Name, } - flowConnConfig := connectionGen.GenerateFlowConnectionConfigs() + flowConnConfig := connectionGen.GenerateFlowConnectionConfigs(s.t) flowConnConfig.MaxBatchSize = 100 // wait for PeerFlowStatusQuery to finish setup @@ -1071,15 +1071,15 @@ func (s PeerFlowE2ETestSuiteBQ) Test_Soft_Delete_Basic() { require.NoError(s.t, err) config := &protos.FlowConnectionConfigs{ - FlowJobName: s.attachSuffix(tableName), - Destination: s.bqHelper.Peer, + FlowJobName: s.attachSuffix(tableName), + DestinationName: s.Peer().Name, TableMappings: []*protos.TableMapping{ { SourceTableIdentifier: srcTableName, DestinationTableIdentifier: tableName, }, }, - Source: e2e.GeneratePostgresPeer(), + SourceName: e2e.GeneratePostgresPeer(s.t).Name, SoftDelete: true, SoftDeleteColName: "_PEERDB_IS_DELETED", SyncedAtColName: "_PEERDB_SYNCED_AT", @@ -1091,16 +1091,14 @@ func (s PeerFlowE2ETestSuiteBQ) Test_Soft_Delete_Basic() { env := e2e.ExecutePeerflow(tc, peerflow.CDCFlowWorkflow, config, nil) e2e.SetupCDCFlowStatusQuery(s.t, env, config) - _, err = s.Conn().Exec(context.Background(), fmt.Sprintf(` - INSERT INTO %s(c1,c2,t) VALUES (1,2,random_string(9000))`, srcTableName)) + _, err = s.Conn().Exec(context.Background(), + fmt.Sprintf(`INSERT INTO %s(c1,c2,t) VALUES (1,2,random_string(9000))`, srcTableName)) e2e.EnvNoError(s.t, env, err) e2e.EnvWaitForEqualTablesWithNames(env, s, "normalize insert", srcName, tableName, "id,c1,c2,t") - _, err = s.Conn().Exec(context.Background(), fmt.Sprintf(` - UPDATE %s SET c1=c1+4 WHERE id=1`, srcTableName)) + _, err = s.Conn().Exec(context.Background(), fmt.Sprintf(`UPDATE %s SET c1=c1+4 WHERE id=1`, srcTableName)) e2e.EnvNoError(s.t, env, err) e2e.EnvWaitForEqualTablesWithNames(env, s, "normalize update", srcName, tableName, "id,c1,c2,t") - _, err = s.Conn().Exec(context.Background(), fmt.Sprintf(` - DELETE FROM %s WHERE id=1`, srcTableName)) + _, err = s.Conn().Exec(context.Background(), fmt.Sprintf(`DELETE FROM %s WHERE id=1`, srcTableName)) e2e.EnvNoError(s.t, env, err) e2e.EnvWaitFor(s.t, env, 3*time.Minute, "normalize delete", func() bool { pgRows, err := e2e.GetPgRows(s.conn, s.bqSuffix, srcName, "id,c1,c2,t") @@ -1143,15 +1141,15 @@ func (s PeerFlowE2ETestSuiteBQ) Test_Soft_Delete_IUD_Same_Batch() { require.NoError(s.t, err) config := &protos.FlowConnectionConfigs{ - FlowJobName: s.attachSuffix("test_softdel_iud"), - Destination: s.bqHelper.Peer, + FlowJobName: s.attachSuffix("test_softdel_iud"), + DestinationName: s.Peer().Name, TableMappings: []*protos.TableMapping{ { SourceTableIdentifier: srcTableName, DestinationTableIdentifier: dstTableName, }, }, - Source: e2e.GeneratePostgresPeer(), + SourceName: e2e.GeneratePostgresPeer(s.t).Name, SoftDelete: true, SoftDeleteColName: "_custom_deleted", SyncedAtColName: "_custom_synced", @@ -1166,18 +1164,16 @@ func (s PeerFlowE2ETestSuiteBQ) Test_Soft_Delete_IUD_Same_Batch() { insertTx, err := s.Conn().Begin(context.Background()) e2e.EnvNoError(s.t, env, err) - _, err = insertTx.Exec(context.Background(), fmt.Sprintf(` - INSERT INTO %s(c1,c2,t) VALUES (1,2,random_string(9000))`, srcTableName)) + _, err = insertTx.Exec(context.Background(), + fmt.Sprintf("INSERT INTO %s(c1,c2,t) VALUES (1,2,random_string(9000))", srcTableName)) e2e.EnvNoError(s.t, env, err) - _, err = insertTx.Exec(context.Background(), fmt.Sprintf(` - UPDATE %s SET c1=c1+4 WHERE id=1`, srcTableName)) + _, err = insertTx.Exec(context.Background(), fmt.Sprintf("UPDATE %s SET c1=c1+4 WHERE id=1", srcTableName)) e2e.EnvNoError(s.t, env, err) // since we delete stuff, create another table to compare with - _, err = insertTx.Exec(context.Background(), fmt.Sprintf(` - CREATE TABLE %s AS SELECT * FROM %s`, cmpTableName, srcTableName)) + _, err = insertTx.Exec(context.Background(), + fmt.Sprintf("CREATE TABLE %s AS SELECT * FROM %s", cmpTableName, srcTableName)) e2e.EnvNoError(s.t, env, err) - _, err = insertTx.Exec(context.Background(), fmt.Sprintf(` - DELETE FROM %s WHERE id=1`, srcTableName)) + _, err = insertTx.Exec(context.Background(), fmt.Sprintf("DELETE FROM %s WHERE id=1", srcTableName)) e2e.EnvNoError(s.t, env, err) e2e.EnvNoError(s.t, env, insertTx.Commit(context.Background())) @@ -1213,15 +1209,15 @@ func (s PeerFlowE2ETestSuiteBQ) Test_Soft_Delete_UD_Same_Batch() { require.NoError(s.t, err) config := &protos.FlowConnectionConfigs{ - FlowJobName: s.attachSuffix(dstName), - Destination: s.bqHelper.Peer, + FlowJobName: s.attachSuffix(dstName), + DestinationName: s.Peer().Name, TableMappings: []*protos.TableMapping{ { SourceTableIdentifier: srcTableName, DestinationTableIdentifier: dstName, }, }, - Source: e2e.GeneratePostgresPeer(), + SourceName: e2e.GeneratePostgresPeer(s.t).Name, SoftDelete: true, SoftDeleteColName: "_PEERDB_IS_DELETED", SyncedAtColName: "_PEERDB_SYNCED_AT", @@ -1233,21 +1229,21 @@ func (s PeerFlowE2ETestSuiteBQ) Test_Soft_Delete_UD_Same_Batch() { env := e2e.ExecutePeerflow(tc, peerflow.CDCFlowWorkflow, config, nil) e2e.SetupCDCFlowStatusQuery(s.t, env, config) - _, err = s.Conn().Exec(context.Background(), fmt.Sprintf(` - INSERT INTO %s(c1,c2,t) VALUES (1,2,random_string(9000))`, srcTableName)) + _, err = s.Conn().Exec(context.Background(), fmt.Sprintf( + "INSERT INTO %s(c1,c2,t) VALUES (1,2,random_string(9000))", srcTableName)) e2e.EnvNoError(s.t, env, err) e2e.EnvWaitForEqualTablesWithNames(env, s, "normalize insert", srcName, dstName, "id,c1,c2,t") insertTx, err := s.Conn().Begin(context.Background()) e2e.EnvNoError(s.t, env, err) - _, err = insertTx.Exec(context.Background(), fmt.Sprintf(` - UPDATE %s SET t=random_string(10000) WHERE id=1`, srcTableName)) + _, err = insertTx.Exec(context.Background(), fmt.Sprintf( + "UPDATE %s SET t=random_string(10000) WHERE id=1", srcTableName)) e2e.EnvNoError(s.t, env, err) - _, err = insertTx.Exec(context.Background(), fmt.Sprintf(` - UPDATE %s SET c1=c1+4 WHERE id=1`, srcTableName)) + _, err = insertTx.Exec(context.Background(), fmt.Sprintf( + "UPDATE %s SET c1=c1+4 WHERE id=1", srcTableName)) e2e.EnvNoError(s.t, env, err) - _, err = insertTx.Exec(context.Background(), fmt.Sprintf(` - DELETE FROM %s WHERE id=1`, srcTableName)) + _, err = insertTx.Exec(context.Background(), fmt.Sprintf( + "DELETE FROM %s WHERE id=1", srcTableName)) e2e.EnvNoError(s.t, env, err) e2e.EnvNoError(s.t, env, insertTx.Commit(context.Background())) @@ -1289,15 +1285,15 @@ func (s PeerFlowE2ETestSuiteBQ) Test_Soft_Delete_Insert_After_Delete() { require.NoError(s.t, err) config := &protos.FlowConnectionConfigs{ - FlowJobName: s.attachSuffix(tableName), - Destination: s.bqHelper.Peer, + FlowJobName: s.attachSuffix(tableName), + DestinationName: s.Peer().Name, TableMappings: []*protos.TableMapping{ { SourceTableIdentifier: srcTableName, DestinationTableIdentifier: tableName, }, }, - Source: e2e.GeneratePostgresPeer(), + SourceName: e2e.GeneratePostgresPeer(s.t).Name, SoftDelete: true, SoftDeleteColName: "_PEERDB_IS_DELETED", SyncedAtColName: "_PEERDB_SYNCED_AT", @@ -1309,12 +1305,12 @@ func (s PeerFlowE2ETestSuiteBQ) Test_Soft_Delete_Insert_After_Delete() { env := e2e.ExecutePeerflow(tc, peerflow.CDCFlowWorkflow, config, nil) e2e.SetupCDCFlowStatusQuery(s.t, env, config) - _, err = s.Conn().Exec(context.Background(), fmt.Sprintf(` - INSERT INTO %s(c1,c2,t) VALUES (1,2,random_string(9000))`, srcTableName)) + _, err = s.Conn().Exec(context.Background(), fmt.Sprintf( + "INSERT INTO %s(c1,c2,t) VALUES (1,2,random_string(9000))", srcTableName)) e2e.EnvNoError(s.t, env, err) e2e.EnvWaitForEqualTables(env, s, "normalize insert", tableName, "id,c1,c2,t") - _, err = s.Conn().Exec(context.Background(), fmt.Sprintf(` - DELETE FROM %s WHERE id=1`, srcTableName)) + _, err = s.Conn().Exec(context.Background(), fmt.Sprintf( + "DELETE FROM %s WHERE id=1", srcTableName)) e2e.EnvNoError(s.t, env, err) e2e.EnvWaitFor(s.t, env, 3*time.Minute, "normalize delete", func() bool { pgRows, err := e2e.GetPgRows(s.conn, s.bqSuffix, tableName, "id,c1,c2,t") @@ -1327,8 +1323,8 @@ func (s PeerFlowE2ETestSuiteBQ) Test_Soft_Delete_Insert_After_Delete() { } return e2eshared.CheckEqualRecordBatches(s.t, pgRows, rows) }) - _, err = s.Conn().Exec(context.Background(), fmt.Sprintf(` - INSERT INTO %s(id,c1,c2,t) VALUES (1,3,4,random_string(10000))`, srcTableName)) + _, err = s.Conn().Exec(context.Background(), fmt.Sprintf( + "INSERT INTO %s(id,c1,c2,t) VALUES (1,3,4,random_string(10000))", srcTableName)) e2e.EnvNoError(s.t, env, err) e2e.EnvWaitForEqualTables(env, s, "normalize reinsert", tableName, "id,c1,c2,t") @@ -1357,18 +1353,17 @@ func (s PeerFlowE2ETestSuiteBQ) Test_JSON_PKey_BQ() { `, srcTableName)) require.NoError(s.t, err) - _, err = s.Conn().Exec(context.Background(), fmt.Sprintf(` - ALTER TABLE %s REPLICA IDENTITY FULL - `, srcTableName)) + _, err = s.Conn().Exec(context.Background(), + fmt.Sprintf("ALTER TABLE %s REPLICA IDENTITY FULL", srcTableName)) require.NoError(s.t, err) connectionGen := e2e.FlowConnectionGenerationConfig{ FlowJobName: s.attachSuffix("test_json_pkey_flow"), TableNameMapping: map[string]string{srcTableName: dstTableName}, - Destination: s.bqHelper.Peer, + Destination: s.Peer().Name, } - flowConnConfig := connectionGen.GenerateFlowConnectionConfigs() + flowConnConfig := connectionGen.GenerateFlowConnectionConfigs(s.t) flowConnConfig.MaxBatchSize = 100 flowConnConfig.SoftDelete = false flowConnConfig.SoftDeleteColName = "" @@ -1383,9 +1378,9 @@ func (s PeerFlowE2ETestSuiteBQ) Test_JSON_PKey_BQ() { testKey := fmt.Sprintf("test_key_%d", i) testValue := fmt.Sprintf("test_value_%d", i) testJson := `'{"name":"jack", "age":12, "spouse":null}'::json` - _, err = s.Conn().Exec(context.Background(), fmt.Sprintf(` - INSERT INTO %s(key, value, j) VALUES ($1, $2, %s) - `, srcTableName, testJson), testKey, testValue) + _, err = s.Conn().Exec(context.Background(), + fmt.Sprintf("INSERT INTO %s(key, value, j) VALUES ($1, $2, %s)", srcTableName, testJson), + testKey, testValue) e2e.EnvNoError(s.t, env, err) } s.t.Log("Inserted 10 rows into the source table") diff --git a/flow/e2e/bigquery/qrep_flow_bq_test.go b/flow/e2e/bigquery/qrep_flow_bq_test.go index c75c1e57d6..d52fffb724 100644 --- a/flow/e2e/bigquery/qrep_flow_bq_test.go +++ b/flow/e2e/bigquery/qrep_flow_bq_test.go @@ -63,16 +63,15 @@ func (s PeerFlowE2ETestSuiteBQ) Test_Complete_QRep_Flow_Avro() { query := fmt.Sprintf("SELECT * FROM e2e_test_%s.%s WHERE updated_at BETWEEN {{.start}} AND {{.end}}", s.bqSuffix, tblName) - qrepConfig, err := e2e.CreateQRepWorkflowConfig("test_qrep_flow_avro", + qrepConfig := e2e.CreateQRepWorkflowConfig(s.t, "test_qrep_flow_avro", fmt.Sprintf("e2e_test_%s.%s", s.bqSuffix, tblName), tblName, query, - s.bqHelper.Peer, + s.Peer().Name, "", true, "", "") - require.NoError(s.t, err) env := e2e.RunQRepFlowWorkflow(tc, qrepConfig) e2e.EnvWaitForFinished(s.t, env, 3*time.Minute) require.NoError(s.t, env.Error()) @@ -88,17 +87,16 @@ func (s PeerFlowE2ETestSuiteBQ) Test_Invalid_Timestamps_And_Date_QRep() { query := fmt.Sprintf("SELECT * FROM e2e_test_%s.%s WHERE watermark_ts BETWEEN {{.start}} AND {{.end}}", s.bqSuffix, tblName) - qrepConfig, err := e2e.CreateQRepWorkflowConfig("test_invalid_time_bq", + qrepConfig := e2e.CreateQRepWorkflowConfig(s.t, "test_invalid_time_bq", fmt.Sprintf("e2e_test_%s.%s", s.bqSuffix, tblName), tblName, query, - s.bqHelper.Peer, + s.Peer().Name, "", true, "", "") qrepConfig.WatermarkColumn = "watermark_ts" - require.NoError(s.t, err) env := e2e.RunQRepFlowWorkflow(tc, qrepConfig) e2e.EnvWaitForFinished(s.t, env, 3*time.Minute) require.NoError(s.t, env.Error()) @@ -130,20 +128,18 @@ func (s PeerFlowE2ETestSuiteBQ) Test_PeerDB_Columns_QRep_BQ() { query := fmt.Sprintf("SELECT * FROM e2e_test_%s.%s WHERE updated_at BETWEEN {{.start}} AND {{.end}}", s.bqSuffix, tblName) - qrepConfig, err := e2e.CreateQRepWorkflowConfig("test_qrep_flow_avro", + qrepConfig := e2e.CreateQRepWorkflowConfig(s.t, "test_qrep_flow_avro", fmt.Sprintf("e2e_test_%s.%s", s.bqSuffix, tblName), tblName, query, - s.bqHelper.Peer, + s.Peer().Name, "", true, "_PEERDB_SYNCED_AT", "") - require.NoError(s.t, err) env := e2e.RunQRepFlowWorkflow(tc, qrepConfig) e2e.EnvWaitForFinished(s.t, env, 3*time.Minute) require.NoError(s.t, env.Error()) - err = s.checkPeerdbColumns(tblName, false) - require.NoError(s.t, err) + require.NoError(s.t, s.checkPeerdbColumns(tblName, false)) } diff --git a/flow/e2e/congen.go b/flow/e2e/congen.go index 850cc03f8c..dcb4e86a3a 100644 --- a/flow/e2e/congen.go +++ b/flow/e2e/congen.go @@ -11,6 +11,7 @@ import ( "github.com/stretchr/testify/require" connpostgres "github.com/PeerDB-io/peer-flow/connectors/postgres" + "github.com/PeerDB-io/peer-flow/connectors/utils" "github.com/PeerDB-io/peer-flow/generated/protos" "github.com/PeerDB-io/peer-flow/peerdbenv" ) @@ -112,14 +113,12 @@ func SetupPostgres(t *testing.T, suffix string) (*connpostgres.PostgresConnector } conn := connector.Conn() - err = cleanPostgres(conn, suffix) - if err != nil { + if err := cleanPostgres(conn, suffix); err != nil { connector.Close() return nil, err } - err = setupPostgresSchema(t, conn, suffix) - if err != nil { + if err := setupPostgresSchema(t, conn, suffix); err != nil { connector.Close() return nil, err } @@ -150,20 +149,23 @@ func TearDownPostgres[T Suite](s T) { } // GeneratePostgresPeer generates a postgres peer config for testing. -func GeneratePostgresPeer() *protos.Peer { - return &protos.Peer{ - Name: "test_postgres_peer", +func GeneratePostgresPeer(t *testing.T) *protos.Peer { + t.Helper() + peer := &protos.Peer{ + Name: "catalog", Type: protos.DBType_POSTGRES, Config: &protos.Peer_PostgresConfig{ PostgresConfig: peerdbenv.GetCatalogPostgresConfigFromEnv(), }, } + CreatePeer(t, peer) + return peer } type FlowConnectionGenerationConfig struct { FlowJobName string TableNameMapping map[string]string - Destination *protos.Peer + Destination string TableMappings []*protos.TableMapping SoftDelete bool } @@ -182,7 +184,20 @@ func TableMappings(s GenericSuite, tables ...string) []*protos.TableMapping { return tm } -func (c *FlowConnectionGenerationConfig) GenerateFlowConnectionConfigs() *protos.FlowConnectionConfigs { +func CreatePeer(t *testing.T, peer *protos.Peer) { + t.Helper() + ctx := context.Background() + pool, err := peerdbenv.GetCatalogConnectionPoolFromEnv(ctx) + require.NoError(t, err) + res, err := utils.CreatePeerNoValidate(ctx, pool, peer) + require.NoError(t, err) + if res.Status != protos.CreatePeerStatus_CREATED { + require.Fail(t, res.Message) + } +} + +func (c *FlowConnectionGenerationConfig) GenerateFlowConnectionConfigs(t *testing.T) *protos.FlowConnectionConfigs { + t.Helper() tblMappings := c.TableMappings if tblMappings == nil { for k, v := range c.TableNameMapping { @@ -196,8 +211,8 @@ func (c *FlowConnectionGenerationConfig) GenerateFlowConnectionConfigs() *protos ret := &protos.FlowConnectionConfigs{ FlowJobName: c.FlowJobName, TableMappings: tblMappings, - Source: GeneratePostgresPeer(), - Destination: c.Destination, + SourceName: GeneratePostgresPeer(t).Name, + DestinationName: c.Destination, SoftDelete: c.SoftDelete, SyncedAtColName: "_PEERDB_SYNCED_AT", IdleTimeoutSeconds: 15, @@ -207,32 +222,3 @@ func (c *FlowConnectionGenerationConfig) GenerateFlowConnectionConfigs() *protos } return ret } - -type QRepFlowConnectionGenerationConfig struct { - FlowJobName string - WatermarkTable string - DestinationTableIdentifier string - Destination *protos.Peer - StagingPath string - PostgresPort uint16 -} - -// GenerateQRepConfig generates a qrep config for testing. -func (c *QRepFlowConnectionGenerationConfig) GenerateQRepConfig( - query string, watermark string, -) *protos.QRepConfig { - return &protos.QRepConfig{ - FlowJobName: c.FlowJobName, - WatermarkTable: c.WatermarkTable, - DestinationTableIdentifier: c.DestinationTableIdentifier, - SourcePeer: GeneratePostgresPeer(), - DestinationPeer: c.Destination, - Query: query, - WatermarkColumn: watermark, - StagingPath: c.StagingPath, - WriteMode: &protos.QRepWriteMode{ - WriteType: protos.QRepWriteType_QREP_WRITE_MODE_APPEND, - }, - NumRowsPerPartition: 1000, - } -} diff --git a/flow/e2e/elasticsearch/elasticsearch.go b/flow/e2e/elasticsearch/elasticsearch.go index 9a338c2969..720cfdd62a 100644 --- a/flow/e2e/elasticsearch/elasticsearch.go +++ b/flow/e2e/elasticsearch/elasticsearch.go @@ -17,11 +17,11 @@ import ( ) type elasticsearchSuite struct { - t *testing.T - conn *connpostgres.PostgresConnector - esClient *elasticsearch.TypedClient - peer *protos.Peer - suffix string + t *testing.T + conn *connpostgres.PostgresConnector + esClient *elasticsearch.TypedClient + suffix string + esAddresses []string } func (s elasticsearchSuite) T() *testing.T { @@ -52,24 +52,13 @@ func SetupSuite(t *testing.T) elasticsearchSuite { }) require.NoError(t, err, "failed to setup elasticsearch") - suite := elasticsearchSuite{ - t: t, - conn: conn, - esClient: esClient, - suffix: suffix, + return elasticsearchSuite{ + t: t, + conn: conn, + esClient: esClient, + esAddresses: esAddresses, + suffix: suffix, } - suite.peer = &protos.Peer{ - Name: e2e.AddSuffix(suite, "elasticsearch"), - Type: protos.DBType_ELASTICSEARCH, - Config: &protos.Peer_ElasticsearchConfig{ - ElasticsearchConfig: &protos.ElasticsearchConfig{ - Addresses: esAddresses, - AuthType: protos.ElasticsearchAuthType_NONE, - }, - }, - } - - return suite } func (s elasticsearchSuite) Teardown() { @@ -77,7 +66,18 @@ func (s elasticsearchSuite) Teardown() { } func (s elasticsearchSuite) Peer() *protos.Peer { - return s.peer + ret := &protos.Peer{ + Name: e2e.AddSuffix(s, "elasticsearch"), + Type: protos.DBType_ELASTICSEARCH, + Config: &protos.Peer_ElasticsearchConfig{ + ElasticsearchConfig: &protos.ElasticsearchConfig{ + Addresses: s.esAddresses, + AuthType: protos.ElasticsearchAuthType_NONE, + }, + }, + } + e2e.CreatePeer(s.t, ret) + return ret } func (s elasticsearchSuite) countDocumentsInIndex(index string) int64 { diff --git a/flow/e2e/elasticsearch/peer_flow_es_test.go b/flow/e2e/elasticsearch/peer_flow_es_test.go index cec581a6be..0719ae40ab 100644 --- a/flow/e2e/elasticsearch/peer_flow_es_test.go +++ b/flow/e2e/elasticsearch/peer_flow_es_test.go @@ -28,9 +28,9 @@ func (s elasticsearchSuite) Test_Simple_PKey_CDC_Mirror() { connectionGen := e2e.FlowConnectionGenerationConfig{ FlowJobName: e2e.AddSuffix(s, "es_simple_pkey_cdc"), TableNameMapping: map[string]string{srcTableName: srcTableName}, - Destination: s.peer, + Destination: s.Peer().Name, } - flowConnConfig := connectionGen.GenerateFlowConnectionConfigs() + flowConnConfig := connectionGen.GenerateFlowConnectionConfigs(s.t) flowConnConfig.MaxBatchSize = 100 flowConnConfig.DoInitialSnapshot = true @@ -97,9 +97,9 @@ func (s elasticsearchSuite) Test_Composite_PKey_CDC_Mirror() { connectionGen := e2e.FlowConnectionGenerationConfig{ FlowJobName: e2e.AddSuffix(s, "es_composite_pkey_cdc"), TableNameMapping: map[string]string{srcTableName: srcTableName}, - Destination: s.peer, + Destination: s.Peer().Name, } - flowConnConfig := connectionGen.GenerateFlowConnectionConfigs() + flowConnConfig := connectionGen.GenerateFlowConnectionConfigs(s.t) flowConnConfig.MaxBatchSize = 100 flowConnConfig.DoInitialSnapshot = true diff --git a/flow/e2e/elasticsearch/qrep_flow_es_test.go b/flow/e2e/elasticsearch/qrep_flow_es_test.go index 85ac99043e..2839d60270 100644 --- a/flow/e2e/elasticsearch/qrep_flow_es_test.go +++ b/flow/e2e/elasticsearch/qrep_flow_es_test.go @@ -43,11 +43,11 @@ func (s elasticsearchSuite) Test_Simple_QRep_Append() { query := fmt.Sprintf("SELECT * FROM %s WHERE updated_at BETWEEN {{.start}} AND {{.end}}", srcTableName) - qrepConfig, err := e2e.CreateQRepWorkflowConfig("test_es_simple_qrep", + qrepConfig := e2e.CreateQRepWorkflowConfig(s.t, "test_es_simple_qrep", srcTableName, srcTableName, query, - s.peer, + s.Peer().Name, "", false, "", @@ -55,7 +55,6 @@ func (s elasticsearchSuite) Test_Simple_QRep_Append() { ) qrepConfig.InitialCopyOnly = false - require.NoError(s.t, err) env := e2e.RunQRepFlowWorkflow(tc, qrepConfig) e2e.EnvWaitFor(s.t, env, 10*time.Second, "waiting for ES to catch up", func() bool { @@ -97,11 +96,11 @@ func (s elasticsearchSuite) Test_Simple_QRep_Upsert() { query := fmt.Sprintf("SELECT * FROM %s WHERE updated_at BETWEEN {{.start}} AND {{.end}}", srcTableName) - qrepConfig, err := e2e.CreateQRepWorkflowConfig("test_es_simple_qrep", + qrepConfig := e2e.CreateQRepWorkflowConfig(s.t, "test_es_simple_qrep", srcTableName, srcTableName, query, - s.peer, + s.Peer().Name, "", false, "", @@ -113,7 +112,6 @@ func (s elasticsearchSuite) Test_Simple_QRep_Upsert() { } qrepConfig.InitialCopyOnly = false - require.NoError(s.t, err) env := e2e.RunQRepFlowWorkflow(tc, qrepConfig) e2e.EnvWaitFor(s.t, env, 10*time.Second, "waiting for ES to catch up", func() bool { diff --git a/flow/e2e/eventhub/peer_flow_eh_test.go b/flow/e2e/eventhub/peer_flow_eh_test.go index ba73e07eaf..3176b15dd7 100644 --- a/flow/e2e/eventhub/peer_flow_eh_test.go +++ b/flow/e2e/eventhub/peer_flow_eh_test.go @@ -65,7 +65,7 @@ func EventhubsCreds() (*protos.EventHubConfig, error) { } func (s EventhubsSuite) Peer(config *protos.EventHubConfig) *protos.Peer { - return &protos.Peer{ + ret := &protos.Peer{ Name: e2e.AddSuffix(s, "eventhubs"), Type: protos.DBType_EVENTHUBS, Config: &protos.Peer_EventhubGroupConfig{ @@ -84,6 +84,8 @@ func (s EventhubsSuite) Peer(config *protos.EventHubConfig) *protos.Peer { }, }, } + e2e.CreatePeer(s.t, ret) + return ret } func (s EventhubsSuite) GetEventhubName() string { @@ -141,12 +143,13 @@ func (s EventhubsSuite) Test_EH_Simple() { flowName := e2e.AddSuffix(s, "e2e_eh_simple") scopedEventhubName := fmt.Sprintf("%s.%s.id", ehCreds.Namespace, s.GetEventhubName()) + destinationPeer := s.Peer(ehCreds) connectionGen := e2e.FlowConnectionGenerationConfig{ FlowJobName: flowName, TableNameMapping: map[string]string{srcTableName: scopedEventhubName}, - Destination: s.Peer(ehCreds), + Destination: destinationPeer.Name, } - flowConnConfig := connectionGen.GenerateFlowConnectionConfigs() + flowConnConfig := connectionGen.GenerateFlowConnectionConfigs(s.t) flowConnConfig.Script = "e2e_eh_simple_script" tc := e2e.NewTemporalClient(s.t) env := e2e.ExecutePeerflow(tc, peerflow.CDCFlowWorkflow, flowConnConfig, nil) @@ -163,7 +166,7 @@ func (s EventhubsSuite) Test_EH_Simple() { ehCreds.Namespace, s.GetEventhubName(), 1, - flowConnConfig.Destination.GetEventhubGroupConfig(), + destinationPeer.GetEventhubGroupConfig(), ) if err != nil { return false diff --git a/flow/e2e/generic/generic_test.go b/flow/e2e/generic/generic_test.go index 4f5bd02bb5..f07002a1f1 100644 --- a/flow/e2e/generic/generic_test.go +++ b/flow/e2e/generic/generic_test.go @@ -60,9 +60,9 @@ func (s Generic) Test_Simple_Flow() { connectionGen := e2e.FlowConnectionGenerationConfig{ FlowJobName: e2e.AddSuffix(s, "test_simple"), TableMappings: e2e.TableMappings(s, srcTable, dstTable), - Destination: s.Peer(), + Destination: s.Peer().Name, } - flowConnConfig := connectionGen.GenerateFlowConnectionConfigs() + flowConnConfig := connectionGen.GenerateFlowConnectionConfigs(t) tc := e2e.NewTemporalClient(t) env := e2e.ExecutePeerflow(tc, peerflow.CDCFlowWorkflow, flowConnConfig, nil) @@ -108,10 +108,10 @@ func (s Generic) Test_Simple_Schema_Changes() { connectionGen := e2e.FlowConnectionGenerationConfig{ FlowJobName: e2e.AddSuffix(s, srcTable), TableMappings: e2e.TableMappings(s, srcTable, dstTable), - Destination: s.Peer(), + Destination: s.Peer().Name, } - flowConnConfig := connectionGen.GenerateFlowConnectionConfigs() + flowConnConfig := connectionGen.GenerateFlowConnectionConfigs(t) // wait for PeerFlowStatusQuery to finish setup // and then insert and mutate schema repeatedly. diff --git a/flow/e2e/kafka/kafka_test.go b/flow/e2e/kafka/kafka_test.go index 987528d2d0..33affa9230 100644 --- a/flow/e2e/kafka/kafka_test.go +++ b/flow/e2e/kafka/kafka_test.go @@ -42,7 +42,7 @@ func (s KafkaSuite) Suffix() string { } func (s KafkaSuite) Peer() *protos.Peer { - return &protos.Peer{ + ret := &protos.Peer{ Name: e2e.AddSuffix(s, "kafka"), Type: protos.DBType_KAFKA, Config: &protos.Peer_KafkaConfig{ @@ -52,6 +52,8 @@ func (s KafkaSuite) Peer() *protos.Peer { }, }, } + e2e.CreatePeer(s.t, ret) + return ret } func (s KafkaSuite) DestinationTable(table string) string { @@ -99,9 +101,9 @@ func (s KafkaSuite) TestSimple() { connectionGen := e2e.FlowConnectionGenerationConfig{ FlowJobName: flowName, TableNameMapping: map[string]string{srcTableName: flowName}, - Destination: s.Peer(), + Destination: s.Peer().Name, } - flowConnConfig := connectionGen.GenerateFlowConnectionConfigs() + flowConnConfig := connectionGen.GenerateFlowConnectionConfigs(s.t) flowConnConfig.Script = "e2e_kasimple" tc := e2e.NewTemporalClient(s.t) @@ -158,9 +160,9 @@ func (s KafkaSuite) TestMessage() { connectionGen := e2e.FlowConnectionGenerationConfig{ FlowJobName: flowName, TableNameMapping: map[string]string{srcTableName: flowName}, - Destination: s.Peer(), + Destination: s.Peer().Name, } - flowConnConfig := connectionGen.GenerateFlowConnectionConfigs() + flowConnConfig := connectionGen.GenerateFlowConnectionConfigs(s.t) flowConnConfig.Script = "e2e_kamessage" tc := e2e.NewTemporalClient(s.t) @@ -210,9 +212,9 @@ func (s KafkaSuite) TestDefault() { connectionGen := e2e.FlowConnectionGenerationConfig{ FlowJobName: flowName, TableNameMapping: map[string]string{srcTableName: flowName}, - Destination: s.Peer(), + Destination: s.Peer().Name, } - flowConnConfig := connectionGen.GenerateFlowConnectionConfigs() + flowConnConfig := connectionGen.GenerateFlowConnectionConfigs(s.t) tc := e2e.NewTemporalClient(s.t) env := e2e.ExecutePeerflow(tc, peerflow.CDCFlowWorkflow, flowConnConfig, nil) @@ -265,9 +267,9 @@ func (s KafkaSuite) TestInitialLoad() { connectionGen := e2e.FlowConnectionGenerationConfig{ FlowJobName: flowName, TableNameMapping: map[string]string{srcTableName: flowName}, - Destination: s.Peer(), + Destination: s.Peer().Name, } - flowConnConfig := connectionGen.GenerateFlowConnectionConfigs() + flowConnConfig := connectionGen.GenerateFlowConnectionConfigs(s.t) flowConnConfig.DoInitialSnapshot = true _, err = s.Conn().Exec(context.Background(), fmt.Sprintf(` diff --git a/flow/e2e/postgres/peer_flow_pg_test.go b/flow/e2e/postgres/peer_flow_pg_test.go index ec96420480..eac4c798c9 100644 --- a/flow/e2e/postgres/peer_flow_pg_test.go +++ b/flow/e2e/postgres/peer_flow_pg_test.go @@ -13,11 +13,9 @@ import ( "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" - "github.com/PeerDB-io/peer-flow/connectors/utils" "github.com/PeerDB-io/peer-flow/e2e" "github.com/PeerDB-io/peer-flow/generated/protos" "github.com/PeerDB-io/peer-flow/model" - "github.com/PeerDB-io/peer-flow/peerdbenv" "github.com/PeerDB-io/peer-flow/shared" peerflow "github.com/PeerDB-io/peer-flow/workflows" ) @@ -67,10 +65,10 @@ func (s PeerFlowE2ETestSuitePG) Test_Geospatial_PG() { connectionGen := e2e.FlowConnectionGenerationConfig{ FlowJobName: s.attachSuffix("test_geo_flow_pg"), TableNameMapping: map[string]string{srcTableName: dstTableName}, - Destination: s.peer, + Destination: s.Peer().Name, } - flowConnConfig := connectionGen.GenerateFlowConnectionConfigs() + flowConnConfig := connectionGen.GenerateFlowConnectionConfigs(s.t) flowConnConfig.MaxBatchSize = 100 tc := e2e.NewTemporalClient(s.t) @@ -112,10 +110,10 @@ func (s PeerFlowE2ETestSuitePG) Test_Types_PG() { connectionGen := e2e.FlowConnectionGenerationConfig{ FlowJobName: s.attachSuffix("test_types_pg"), TableNameMapping: map[string]string{srcTableName: dstTableName}, - Destination: s.peer, + Destination: s.Peer().Name, } - flowConnConfig := connectionGen.GenerateFlowConnectionConfigs() + flowConnConfig := connectionGen.GenerateFlowConnectionConfigs(s.t) flowConnConfig.MaxBatchSize = 100 flowConnConfig.SoftDelete = false flowConnConfig.SoftDeleteColName = "" @@ -179,10 +177,10 @@ func (s PeerFlowE2ETestSuitePG) Test_Enums_PG() { connectionGen := e2e.FlowConnectionGenerationConfig{ FlowJobName: s.attachSuffix("test_enum_flow"), TableNameMapping: map[string]string{srcTableName: dstTableName}, - Destination: s.peer, + Destination: s.Peer().Name, } - flowConnConfig := connectionGen.GenerateFlowConnectionConfigs() + flowConnConfig := connectionGen.GenerateFlowConnectionConfigs(s.t) flowConnConfig.MaxBatchSize = 100 env := e2e.ExecutePeerflow(tc, peerflow.CDCFlowWorkflow, flowConnConfig, nil) @@ -221,10 +219,10 @@ func (s PeerFlowE2ETestSuitePG) Test_Composite_PKey_PG() { connectionGen := e2e.FlowConnectionGenerationConfig{ FlowJobName: s.attachSuffix("test_cpkey_flow"), TableNameMapping: map[string]string{srcTableName: dstTableName}, - Destination: s.peer, + Destination: s.Peer().Name, } - flowConnConfig := connectionGen.GenerateFlowConnectionConfigs() + flowConnConfig := connectionGen.GenerateFlowConnectionConfigs(s.t) flowConnConfig.MaxBatchSize = 100 // wait for PeerFlowStatusQuery to finish setup @@ -283,10 +281,10 @@ func (s PeerFlowE2ETestSuitePG) Test_Composite_PKey_Toast_1_PG() { connectionGen := e2e.FlowConnectionGenerationConfig{ FlowJobName: s.attachSuffix("test_cpkey_toast1_flow"), TableNameMapping: map[string]string{srcTableName: dstTableName}, - Destination: s.peer, + Destination: s.Peer().Name, } - flowConnConfig := connectionGen.GenerateFlowConnectionConfigs() + flowConnConfig := connectionGen.GenerateFlowConnectionConfigs(s.t) flowConnConfig.MaxBatchSize = 100 // wait for PeerFlowStatusQuery to finish setup @@ -348,10 +346,10 @@ func (s PeerFlowE2ETestSuitePG) Test_Composite_PKey_Toast_2_PG() { connectionGen := e2e.FlowConnectionGenerationConfig{ FlowJobName: s.attachSuffix("test_cpkey_toast2_flow"), TableNameMapping: map[string]string{srcTableName: dstTableName}, - Destination: s.peer, + Destination: s.Peer().Name, } - flowConnConfig := connectionGen.GenerateFlowConnectionConfigs() + flowConnConfig := connectionGen.GenerateFlowConnectionConfigs(s.t) flowConnConfig.MaxBatchSize = 100 // wait for PeerFlowStatusQuery to finish setup @@ -405,11 +403,11 @@ func (s PeerFlowE2ETestSuitePG) Test_PeerDB_Columns() { connectionGen := e2e.FlowConnectionGenerationConfig{ FlowJobName: s.attachSuffix("test_peerdb_cols_mirror"), TableNameMapping: map[string]string{srcTableName: dstTableName}, - Destination: s.peer, + Destination: s.Peer().Name, SoftDelete: true, } - flowConnConfig := connectionGen.GenerateFlowConnectionConfigs() + flowConnConfig := connectionGen.GenerateFlowConnectionConfigs(s.t) flowConnConfig.MaxBatchSize = 100 env := e2e.ExecutePeerflow(tc, peerflow.CDCFlowWorkflow, flowConnConfig, nil) @@ -455,15 +453,15 @@ func (s PeerFlowE2ETestSuitePG) Test_Soft_Delete_Basic() { require.NoError(s.t, err) config := &protos.FlowConnectionConfigs{ - FlowJobName: s.attachSuffix("test_softdel"), - Destination: s.peer, + FlowJobName: s.attachSuffix("test_softdel"), + DestinationName: s.Peer().Name, TableMappings: []*protos.TableMapping{ { SourceTableIdentifier: srcTableName, DestinationTableIdentifier: dstTableName, }, }, - Source: e2e.GeneratePostgresPeer(), + SourceName: e2e.GeneratePostgresPeer(s.t).Name, SoftDelete: true, SoftDeleteColName: "_PEERDB_IS_DELETED", SyncedAtColName: "_PEERDB_SYNCED_AT", @@ -533,15 +531,15 @@ func (s PeerFlowE2ETestSuitePG) Test_Soft_Delete_IUD_Same_Batch() { require.NoError(s.t, err) config := &protos.FlowConnectionConfigs{ - FlowJobName: s.attachSuffix("test_softdel_iud"), - Destination: s.peer, + FlowJobName: s.attachSuffix("test_softdel_iud"), + DestinationName: s.Peer().Name, TableMappings: []*protos.TableMapping{ { SourceTableIdentifier: srcTableName, DestinationTableIdentifier: dstTableName, }, }, - Source: e2e.GeneratePostgresPeer(), + SourceName: e2e.GeneratePostgresPeer(s.t).Name, SoftDelete: true, SoftDeleteColName: "_PEERDB_IS_DELETED", SyncedAtColName: "_PEERDB_SYNCED_AT", @@ -602,15 +600,15 @@ func (s PeerFlowE2ETestSuitePG) Test_Soft_Delete_UD_Same_Batch() { require.NoError(s.t, err) config := &protos.FlowConnectionConfigs{ - FlowJobName: s.attachSuffix("test_softdel_ud"), - Destination: s.peer, + FlowJobName: s.attachSuffix("test_softdel_ud"), + DestinationName: s.Peer().Name, TableMappings: []*protos.TableMapping{ { SourceTableIdentifier: srcTableName, DestinationTableIdentifier: dstTableName, }, }, - Source: e2e.GeneratePostgresPeer(), + SourceName: e2e.GeneratePostgresPeer(s.t).Name, SoftDelete: true, SoftDeleteColName: "_PEERDB_IS_DELETED", SyncedAtColName: "_PEERDB_SYNCED_AT", @@ -679,15 +677,15 @@ func (s PeerFlowE2ETestSuitePG) Test_Soft_Delete_Insert_After_Delete() { require.NoError(s.t, err) config := &protos.FlowConnectionConfigs{ - FlowJobName: s.attachSuffix("test_softdel_iad"), - Destination: s.peer, + FlowJobName: s.attachSuffix("test_softdel_iad"), + DestinationName: s.Peer().Name, TableMappings: []*protos.TableMapping{ { SourceTableIdentifier: srcTableName, DestinationTableIdentifier: dstTableName, }, }, - Source: e2e.GeneratePostgresPeer(), + SourceName: e2e.GeneratePostgresPeer(s.t).Name, SoftDelete: true, SoftDeleteColName: "_PEERDB_IS_DELETED", SyncedAtColName: "_PEERDB_SYNCED_AT", @@ -748,15 +746,15 @@ func (s PeerFlowE2ETestSuitePG) Test_Supported_Mixed_Case_Table() { require.NoError(s.t, err) config := &protos.FlowConnectionConfigs{ - FlowJobName: s.attachSuffix("test_mixed_case"), - Destination: s.peer, + FlowJobName: s.attachSuffix("test_mixed_case"), + DestinationName: s.Peer().Name, TableMappings: []*protos.TableMapping{ { SourceTableIdentifier: srcTableName, DestinationTableIdentifier: dstTableName, }, }, - Source: e2e.GeneratePostgresPeer(), + SourceName: e2e.GeneratePostgresPeer(s.t).Name, MaxBatchSize: 100, } @@ -800,9 +798,9 @@ func (s PeerFlowE2ETestSuitePG) Test_Multiple_Parallel_Initial() { DoInitialSnapshot: true, InitialSnapshotOnly: true, FlowJobName: s.attachSuffix("test_multi_init"), - Destination: s.peer, + DestinationName: s.Peer().Name, TableMappings: tableMapping, - Source: e2e.GeneratePostgresPeer(), + SourceName: e2e.GeneratePostgresPeer(s.t).Name, CdcStagingPath: "", SnapshotMaxParallelWorkers: 4, SnapshotNumTablesInParallel: 3, @@ -833,10 +831,10 @@ func (s PeerFlowE2ETestSuitePG) Test_ContinueAsNew() { connectionGen := e2e.FlowConnectionGenerationConfig{ FlowJobName: s.attachSuffix("test_continueasnew_flow"), TableNameMapping: map[string]string{srcTableName: dstTableName}, - Destination: s.peer, + Destination: s.Peer().Name, } - flowConnConfig := connectionGen.GenerateFlowConnectionConfigs() + flowConnConfig := connectionGen.GenerateFlowConnectionConfigs(s.t) flowConnConfig.MaxBatchSize = 2 flowConnConfig.IdleTimeoutSeconds = 10 @@ -878,27 +876,16 @@ func (s PeerFlowE2ETestSuitePG) Test_Dynamic_Mirror_Config_Via_Signals() { `, srcTable1Name, srcTable2Name)) require.NoError(s.t, err) - sourcePeer := e2e.GeneratePostgresPeer() - - conn, err := peerdbenv.GetCatalogConnectionPoolFromEnv(context.Background()) - require.NoError(s.t, err) - - _, err = utils.CreatePeerNoValidate(context.Background(), conn, sourcePeer) - require.NoError(s.t, err) - - _, err = utils.CreatePeerNoValidate(context.Background(), conn, s.peer) - require.NoError(s.t, err) - config := &protos.FlowConnectionConfigs{ - FlowJobName: s.attachSuffix("test_dynconfig"), - Destination: s.peer, + FlowJobName: s.attachSuffix("test_dynconfig"), + DestinationName: s.Peer().Name, TableMappings: []*protos.TableMapping{ { SourceTableIdentifier: srcTable1Name, DestinationTableIdentifier: dstTable1Name, }, }, - Source: sourcePeer, + SourceName: s.Peer().Name, MaxBatchSize: 6, IdleTimeoutSeconds: 7, DoInitialSnapshot: true, @@ -1036,9 +1023,9 @@ func (s PeerFlowE2ETestSuitePG) Test_TypeSystem_PG() { connectionGen := e2e.FlowConnectionGenerationConfig{ FlowJobName: s.attachSuffix("test_typesystem_pg"), TableNameMapping: map[string]string{srcTableName: dstTableName}, - Destination: s.peer, + Destination: s.Peer().Name, } - flowConnConfig := connectionGen.GenerateFlowConnectionConfigs() + flowConnConfig := connectionGen.GenerateFlowConnectionConfigs(s.t) flowConnConfig.DoInitialSnapshot = true flowConnConfig.System = protos.TypeSystem_PG flowConnConfig.SoftDelete = false @@ -1086,9 +1073,9 @@ func (s PeerFlowE2ETestSuitePG) Test_TransformRecordScript() { connectionGen := e2e.FlowConnectionGenerationConfig{ FlowJobName: s.attachSuffix("test_transrecord_pg"), TableNameMapping: map[string]string{srcTableName: dstTableName}, - Destination: s.peer, + Destination: s.Peer().Name, } - flowConnConfig := connectionGen.GenerateFlowConnectionConfigs() + flowConnConfig := connectionGen.GenerateFlowConnectionConfigs(s.t) flowConnConfig.Script = "cdc_transform_record" tc := e2e.NewTemporalClient(s.t) @@ -1135,9 +1122,9 @@ func (s PeerFlowE2ETestSuitePG) Test_TransformRowScript() { connectionGen := e2e.FlowConnectionGenerationConfig{ FlowJobName: s.attachSuffix("test_transrow_pg"), TableNameMapping: map[string]string{srcTableName: dstTableName}, - Destination: s.peer, + Destination: s.Peer().Name, } - flowConnConfig := connectionGen.GenerateFlowConnectionConfigs() + flowConnConfig := connectionGen.GenerateFlowConnectionConfigs(s.t) flowConnConfig.Script = "cdc_transform_row" tc := e2e.NewTemporalClient(s.t) diff --git a/flow/e2e/postgres/postgres.go b/flow/e2e/postgres/postgres.go index 20b99fef68..37a0ace06b 100644 --- a/flow/e2e/postgres/postgres.go +++ b/flow/e2e/postgres/postgres.go @@ -21,7 +21,6 @@ type PeerFlowE2ETestSuitePG struct { t *testing.T conn *connpostgres.PostgresConnector - peer *protos.Peer suffix string } @@ -46,7 +45,7 @@ func (s PeerFlowE2ETestSuitePG) Suffix() string { } func (s PeerFlowE2ETestSuitePG) Peer() *protos.Peer { - return s.peer + return e2e.GeneratePostgresPeer(s.t) } func (s PeerFlowE2ETestSuitePG) DestinationTable(table string) string { @@ -73,7 +72,6 @@ func SetupSuite(t *testing.T) PeerFlowE2ETestSuitePG { return PeerFlowE2ETestSuitePG{ t: t, conn: conn, - peer: e2e.GeneratePostgresPeer(), suffix: suffix, } } diff --git a/flow/e2e/postgres/qrep_flow_pg_test.go b/flow/e2e/postgres/qrep_flow_pg_test.go index eb67b43592..b170729fe7 100644 --- a/flow/e2e/postgres/qrep_flow_pg_test.go +++ b/flow/e2e/postgres/qrep_flow_pg_test.go @@ -196,20 +196,18 @@ func (s PeerFlowE2ETestSuitePG) Test_Complete_QRep_Flow_Multi_Insert_PG() { query := fmt.Sprintf("SELECT * FROM e2e_test_%s.%s WHERE updated_at BETWEEN {{.start}} AND {{.end}}", s.suffix, srcTable) - postgresPeer := e2e.GeneratePostgresPeer() - - qrepConfig, err := e2e.CreateQRepWorkflowConfig( + qrepConfig := e2e.CreateQRepWorkflowConfig( + s.t, "test_qrep_flow_avro_pg", srcSchemaQualified, dstSchemaQualified, query, - postgresPeer, + e2e.GeneratePostgresPeer(s.t).Name, "", true, "", "", ) - require.NoError(s.t, err) tc := e2e.NewTemporalClient(s.t) env := e2e.RunQRepFlowWorkflow(tc, qrepConfig) @@ -237,20 +235,18 @@ func (s PeerFlowE2ETestSuitePG) Test_PG_TypeSystemQRep() { query := fmt.Sprintf("SELECT * FROM e2e_test_%s.%s WHERE updated_at BETWEEN {{.start}} AND {{.end}}", s.suffix, srcTable) - postgresPeer := e2e.GeneratePostgresPeer() - - qrepConfig, err := e2e.CreateQRepWorkflowConfig( + qrepConfig := e2e.CreateQRepWorkflowConfig( + s.t, "test_qrep_flow_pgpg", srcSchemaQualified, dstSchemaQualified, query, - postgresPeer, + e2e.GeneratePostgresPeer(s.t).Name, "", true, "", "", ) - require.NoError(s.t, err) qrepConfig.System = protos.TypeSystem_PG tc := e2e.NewTemporalClient(s.t) @@ -276,28 +272,25 @@ func (s PeerFlowE2ETestSuitePG) Test_PeerDB_Columns_QRep_PG() { query := fmt.Sprintf("SELECT * FROM e2e_test_%s.%s WHERE updated_at BETWEEN {{.start}} AND {{.end}}", s.suffix, srcTable) - postgresPeer := e2e.GeneratePostgresPeer() - - qrepConfig, err := e2e.CreateQRepWorkflowConfig( + qrepConfig := e2e.CreateQRepWorkflowConfig( + s.t, "test_qrep_columns_pg", srcSchemaQualified, dstSchemaQualified, query, - postgresPeer, + e2e.GeneratePostgresPeer(s.t).Name, "", true, "_PEERDB_SYNCED_AT", "", ) - require.NoError(s.t, err) tc := e2e.NewTemporalClient(s.t) env := e2e.RunQRepFlowWorkflow(tc, qrepConfig) e2e.EnvWaitForFinished(s.t, env, 3*time.Minute) require.NoError(s.t, env.Error()) - err = s.checkSyncedAt(dstSchemaQualified) - require.NoError(s.t, err) + require.NoError(s.t, s.checkSyncedAt(dstSchemaQualified)) } func (s PeerFlowE2ETestSuitePG) Test_Overwrite_PG() { @@ -314,20 +307,18 @@ func (s PeerFlowE2ETestSuitePG) Test_Overwrite_PG() { query := fmt.Sprintf("SELECT * FROM e2e_test_%s.%s WHERE updated_at BETWEEN {{.start}} AND {{.end}}", s.suffix, srcTable) - postgresPeer := e2e.GeneratePostgresPeer() - - qrepConfig, err := e2e.CreateQRepWorkflowConfig( + qrepConfig := e2e.CreateQRepWorkflowConfig( + s.t, "test_overwrite_pg", srcSchemaQualified, dstSchemaQualified, query, - postgresPeer, + e2e.GeneratePostgresPeer(s.t).Name, "", true, "_PEERDB_SYNCED_AT", "", ) - require.NoError(s.t, err) qrepConfig.WriteMode = &protos.QRepWriteMode{ WriteType: protos.QRepWriteType_QREP_WRITE_MODE_OVERWRITE, } @@ -336,7 +327,7 @@ func (s PeerFlowE2ETestSuitePG) Test_Overwrite_PG() { tc := e2e.NewTemporalClient(s.t) env := e2e.RunQRepFlowWorkflow(tc, qrepConfig) e2e.EnvWaitFor(s.t, env, 3*time.Minute, "waiting for first sync to complete", func() bool { - err = s.compareCounts(dstSchemaQualified, int64(numRows)) + err := s.compareCounts(dstSchemaQualified, int64(numRows)) return err == nil }) @@ -344,7 +335,7 @@ func (s PeerFlowE2ETestSuitePG) Test_Overwrite_PG() { s.populateSourceTable(srcTable, newRowCount) e2e.EnvWaitFor(s.t, env, 2*time.Minute, "waiting for overwrite sync to complete", func() bool { - err = s.compareCounts(dstSchemaQualified, int64(newRowCount)) + err := s.compareCounts(dstSchemaQualified, int64(newRowCount)) return err == nil }) @@ -365,20 +356,18 @@ func (s PeerFlowE2ETestSuitePG) Test_No_Rows_QRep_PG() { query := fmt.Sprintf("SELECT * FROM e2e_test_%s.%s WHERE updated_at BETWEEN {{.start}} AND {{.end}}", s.suffix, srcTable) - postgresPeer := e2e.GeneratePostgresPeer() - - qrepConfig, err := e2e.CreateQRepWorkflowConfig( + qrepConfig := e2e.CreateQRepWorkflowConfig( + s.t, "test_no_rows_qrep_pg", srcSchemaQualified, dstSchemaQualified, query, - postgresPeer, + e2e.GeneratePostgresPeer(s.t).Name, "", true, "_PEERDB_SYNCED_AT", "", ) - require.NoError(s.t, err) tc := e2e.NewTemporalClient(s.t) env := e2e.RunQRepFlowWorkflow(tc, qrepConfig) @@ -400,18 +389,18 @@ func (s PeerFlowE2ETestSuitePG) Test_Pause() { query := fmt.Sprintf("SELECT * FROM e2e_test_%s.%s WHERE updated_at BETWEEN {{.start}} AND {{.end}}", s.suffix, srcTable) - config, err := e2e.CreateQRepWorkflowConfig( + config := e2e.CreateQRepWorkflowConfig( + s.t, "test_qrep_pause_pg", srcSchemaQualified, dstSchemaQualified, query, - e2e.GeneratePostgresPeer(), + e2e.GeneratePostgresPeer(s.t).Name, "", true, "_PEERDB_SYNCED_AT", "", ) - require.NoError(s.t, err) config.InitialCopyOnly = false tc := e2e.NewTemporalClient(s.t) @@ -462,24 +451,22 @@ func (s PeerFlowE2ETestSuitePG) TestTransform() { query := fmt.Sprintf("SELECT * FROM %s WHERE updated_at BETWEEN {{.start}} AND {{.end}}", srcSchemaQualified) - postgresPeer := e2e.GeneratePostgresPeer() - _, err := s.Conn().Exec(context.Background(), `insert into public.scripts (name, lang, source) values ('pgtransform', 'lua', 'function transformRow(row) row.myreal = 1729 end') on conflict do nothing`) require.NoError(s.t, err) - qrepConfig, err := e2e.CreateQRepWorkflowConfig( + qrepConfig := e2e.CreateQRepWorkflowConfig( + s.t, "test_transform", srcSchemaQualified, dstSchemaQualified, query, - postgresPeer, + e2e.GeneratePostgresPeer(s.t).Name, "", true, "_PEERDB_SYNCED_AT", "", ) - require.NoError(s.t, err) qrepConfig.WriteMode = &protos.QRepWriteMode{ WriteType: protos.QRepWriteType_QREP_WRITE_MODE_OVERWRITE, } diff --git a/flow/e2e/pubsub/pubsub_test.go b/flow/e2e/pubsub/pubsub_test.go index 66393c6727..d4920553d8 100644 --- a/flow/e2e/pubsub/pubsub_test.go +++ b/flow/e2e/pubsub/pubsub_test.go @@ -67,7 +67,7 @@ func ServiceAccount() (*utils.GcpServiceAccount, error) { } func (s PubSubSuite) Peer(sa *utils.GcpServiceAccount) *protos.Peer { - return &protos.Peer{ + ret := &protos.Peer{ Name: e2e.AddSuffix(s, "pubsub"), Type: protos.DBType_PUBSUB, Config: &protos.Peer_PubsubConfig{ @@ -87,6 +87,8 @@ func (s PubSubSuite) Peer(sa *utils.GcpServiceAccount) *protos.Peer { }, }, } + e2e.CreatePeer(s.t, ret) + return ret } func (s PubSubSuite) DestinationTable(table string) string { @@ -137,9 +139,9 @@ func (s PubSubSuite) TestCreateTopic() { connectionGen := e2e.FlowConnectionGenerationConfig{ FlowJobName: flowName, TableNameMapping: map[string]string{srcTableName: flowName}, - Destination: s.Peer(sa), + Destination: s.Peer(sa).Name, } - flowConnConfig := connectionGen.GenerateFlowConnectionConfigs() + flowConnConfig := connectionGen.GenerateFlowConnectionConfigs(s.t) flowConnConfig.Script = "e2e_pscreate" tc := e2e.NewTemporalClient(s.t) @@ -189,9 +191,9 @@ func (s PubSubSuite) TestSimple() { connectionGen := e2e.FlowConnectionGenerationConfig{ FlowJobName: flowName, TableNameMapping: map[string]string{srcTableName: flowName}, - Destination: s.Peer(sa), + Destination: s.Peer(sa).Name, } - flowConnConfig := connectionGen.GenerateFlowConnectionConfigs() + flowConnConfig := connectionGen.GenerateFlowConnectionConfigs(s.t) flowConnConfig.Script = "e2e_pssimple" psclient, err := sa.CreatePubSubClient(context.Background()) @@ -259,9 +261,9 @@ func (s PubSubSuite) TestInitialLoad() { connectionGen := e2e.FlowConnectionGenerationConfig{ FlowJobName: flowName, TableNameMapping: map[string]string{srcTableName: flowName}, - Destination: s.Peer(sa), + Destination: s.Peer(sa).Name, } - flowConnConfig := connectionGen.GenerateFlowConnectionConfigs() + flowConnConfig := connectionGen.GenerateFlowConnectionConfigs(s.t) flowConnConfig.Script = "e2e_psinitial" flowConnConfig.DoInitialSnapshot = true diff --git a/flow/e2e/s3/cdc_s3_test.go b/flow/e2e/s3/cdc_s3_test.go index 65650a3c48..c954d02b50 100644 --- a/flow/e2e/s3/cdc_s3_test.go +++ b/flow/e2e/s3/cdc_s3_test.go @@ -36,10 +36,10 @@ func (s PeerFlowE2ETestSuiteS3) Test_Complete_Simple_Flow_S3() { connectionGen := e2e.FlowConnectionGenerationConfig{ FlowJobName: flowJobName, TableNameMapping: map[string]string{srcTableName: dstTableName}, - Destination: s.s3Helper.GetPeer(), + Destination: s.Peer().Name, } - flowConnConfig := connectionGen.GenerateFlowConnectionConfigs() + flowConnConfig := connectionGen.GenerateFlowConnectionConfigs(s.t) flowConnConfig.MaxBatchSize = 5 env := e2e.ExecutePeerflow(tc, peerflow.CDCFlowWorkflow, flowConnConfig, nil) diff --git a/flow/e2e/s3/qrep_flow_s3_test.go b/flow/e2e/s3/qrep_flow_s3_test.go index 30b466c4a4..0b40a045da 100644 --- a/flow/e2e/s3/qrep_flow_s3_test.go +++ b/flow/e2e/s3/qrep_flow_s3_test.go @@ -12,6 +12,7 @@ import ( connpostgres "github.com/PeerDB-io/peer-flow/connectors/postgres" "github.com/PeerDB-io/peer-flow/e2e" "github.com/PeerDB-io/peer-flow/e2eshared" + "github.com/PeerDB-io/peer-flow/generated/protos" "github.com/PeerDB-io/peer-flow/shared" ) @@ -35,6 +36,19 @@ func (s PeerFlowE2ETestSuiteS3) Suffix() string { return s.suffix } +func (s PeerFlowE2ETestSuiteS3) Peer() *protos.Peer { + s.t.Helper() + ret := &protos.Peer{ + Name: e2e.AddSuffix(s, "s3peer"), + Type: protos.DBType_S3, + Config: &protos.Peer_S3Config{ + S3Config: s.s3Helper.s3Config, + }, + } + e2e.CreatePeer(s.t, ret) + return ret +} + func TestPeerFlowE2ETestSuiteS3(t *testing.T) { e2eshared.RunSuite(t, SetupSuiteS3) } @@ -104,18 +118,18 @@ func (s PeerFlowE2ETestSuiteS3) Test_Complete_QRep_Flow_S3() { s.setupSourceTable(jobName, 10) query := fmt.Sprintf("SELECT * FROM %s WHERE updated_at >= {{.start}} AND updated_at < {{.end}}", schemaQualifiedName) - qrepConfig, err := e2e.CreateQRepWorkflowConfig( + qrepConfig := e2e.CreateQRepWorkflowConfig( + s.t, jobName, schemaQualifiedName, "e2e_dest_1", query, - s.s3Helper.GetPeer(), + s.Peer().Name, "stage", false, "", "", ) - require.NoError(s.t, err) qrepConfig.StagingPath = s.s3Helper.s3Config.Url env := e2e.RunQRepFlowWorkflow(tc, qrepConfig) @@ -146,18 +160,18 @@ func (s PeerFlowE2ETestSuiteS3) Test_Complete_QRep_Flow_S3_CTID() { s.setupSourceTable(jobName, 20000) query := fmt.Sprintf("SELECT * FROM %s WHERE ctid BETWEEN {{.start}} AND {{.end}}", schemaQualifiedName) - qrepConfig, err := e2e.CreateQRepWorkflowConfig( + qrepConfig := e2e.CreateQRepWorkflowConfig( + s.t, jobName, schemaQualifiedName, "e2e_dest_ctid", query, - s.s3Helper.GetPeer(), + s.Peer().Name, "stage", false, "", "", ) - require.NoError(s.t, err) qrepConfig.StagingPath = s.s3Helper.s3Config.Url qrepConfig.NumRowsPerPartition = 2000 qrepConfig.InitialCopyOnly = true diff --git a/flow/e2e/s3/s3_helper.go b/flow/e2e/s3/s3_helper.go index 1c5cc3f377..b478cf2b14 100644 --- a/flow/e2e/s3/s3_helper.go +++ b/flow/e2e/s3/s3_helper.go @@ -17,10 +17,6 @@ import ( "github.com/PeerDB-io/peer-flow/shared" ) -const ( - peerName string = "test_s3_peer" -) - type S3TestHelper struct { client *s3.Client s3Config *protos.S3Config @@ -81,16 +77,6 @@ func NewS3TestHelper(switchToGCS bool) (*S3TestHelper, error) { }, nil } -func (h *S3TestHelper) GetPeer() *protos.Peer { - return &protos.Peer{ - Name: peerName, - Type: protos.DBType_S3, - Config: &protos.Peer_S3Config{ - S3Config: h.s3Config, - }, - } -} - // List all files from the S3 bucket. // returns as a list of S3Objects. func (h *S3TestHelper) ListAllFiles( diff --git a/flow/e2e/snowflake/peer_flow_sf_test.go b/flow/e2e/snowflake/peer_flow_sf_test.go index bb3bff544d..a47944e86e 100644 --- a/flow/e2e/snowflake/peer_flow_sf_test.go +++ b/flow/e2e/snowflake/peer_flow_sf_test.go @@ -52,10 +52,10 @@ func (s PeerFlowE2ETestSuiteSF) Test_Flow_ReplicaIdentity_Index_No_Pkey() { connectionGen := e2e.FlowConnectionGenerationConfig{ FlowJobName: s.attachSuffix(tableName), TableNameMapping: map[string]string{srcTableName: dstTableName}, - Destination: s.sfHelper.Peer, + Destination: s.Peer().Name, } - flowConnConfig := connectionGen.GenerateFlowConnectionConfigs() + flowConnConfig := connectionGen.GenerateFlowConnectionConfigs(s.t) flowConnConfig.MaxBatchSize = 100 // wait for PeerFlowStatusQuery to finish setup @@ -105,10 +105,10 @@ func (s PeerFlowE2ETestSuiteSF) Test_Invalid_Numeric() { connectionGen := e2e.FlowConnectionGenerationConfig{ FlowJobName: s.attachSuffix(tableName), TableNameMapping: map[string]string{srcTableName: dstTableName}, - Destination: s.sfHelper.Peer, + Destination: s.Peer().Name, } - flowConnConfig := connectionGen.GenerateFlowConnectionConfigs() + flowConnConfig := connectionGen.GenerateFlowConnectionConfigs(s.t) flowConnConfig.DoInitialSnapshot = true tc := e2e.NewTemporalClient(s.t) @@ -159,10 +159,10 @@ func (s PeerFlowE2ETestSuiteSF) Test_Invalid_Geo_SF_Avro_CDC() { connectionGen := e2e.FlowConnectionGenerationConfig{ FlowJobName: s.attachSuffix(tableName), TableNameMapping: map[string]string{srcTableName: dstTableName}, - Destination: s.sfHelper.Peer, + Destination: s.Peer().Name, } - flowConnConfig := connectionGen.GenerateFlowConnectionConfigs() + flowConnConfig := connectionGen.GenerateFlowConnectionConfigs(s.t) // wait for PeerFlowStatusQuery to finish setup // and then insert 10 rows into the source table @@ -249,10 +249,10 @@ func (s PeerFlowE2ETestSuiteSF) Test_Toast_SF() { connectionGen := e2e.FlowConnectionGenerationConfig{ FlowJobName: s.attachSuffix("test_toast_sf_1"), TableNameMapping: map[string]string{srcTableName: dstTableName}, - Destination: s.sfHelper.Peer, + Destination: s.Peer().Name, } - flowConnConfig := connectionGen.GenerateFlowConnectionConfigs() + flowConnConfig := connectionGen.GenerateFlowConnectionConfigs(s.t) flowConnConfig.MaxBatchSize = 100 // wait for PeerFlowStatusQuery to finish setup @@ -300,10 +300,10 @@ func (s PeerFlowE2ETestSuiteSF) Test_Toast_Advance_1_SF() { connectionGen := e2e.FlowConnectionGenerationConfig{ FlowJobName: s.attachSuffix("test_toast_sf_3"), TableNameMapping: map[string]string{srcTableName: dstTableName}, - Destination: s.sfHelper.Peer, + Destination: s.Peer().Name, } - flowConnConfig := connectionGen.GenerateFlowConnectionConfigs() + flowConnConfig := connectionGen.GenerateFlowConnectionConfigs(s.t) flowConnConfig.MaxBatchSize = 100 // wait for PeerFlowStatusQuery to finish setup @@ -356,10 +356,10 @@ func (s PeerFlowE2ETestSuiteSF) Test_Toast_Advance_2_SF() { connectionGen := e2e.FlowConnectionGenerationConfig{ FlowJobName: s.attachSuffix("test_toast_sf_4"), TableNameMapping: map[string]string{srcTableName: dstTableName}, - Destination: s.sfHelper.Peer, + Destination: s.Peer().Name, } - flowConnConfig := connectionGen.GenerateFlowConnectionConfigs() + flowConnConfig := connectionGen.GenerateFlowConnectionConfigs(s.t) flowConnConfig.MaxBatchSize = 100 // wait for PeerFlowStatusQuery to finish setup @@ -407,10 +407,10 @@ func (s PeerFlowE2ETestSuiteSF) Test_Toast_Advance_3_SF() { connectionGen := e2e.FlowConnectionGenerationConfig{ FlowJobName: s.attachSuffix("test_toast_sf_5"), TableNameMapping: map[string]string{srcTableName: dstTableName}, - Destination: s.sfHelper.Peer, + Destination: s.Peer().Name, } - flowConnConfig := connectionGen.GenerateFlowConnectionConfigs() + flowConnConfig := connectionGen.GenerateFlowConnectionConfigs(s.t) flowConnConfig.MaxBatchSize = 100 // wait for PeerFlowStatusQuery to finish setup @@ -465,10 +465,10 @@ func (s PeerFlowE2ETestSuiteSF) Test_Types_SF() { connectionGen := e2e.FlowConnectionGenerationConfig{ FlowJobName: s.attachSuffix("test_types_sf"), TableNameMapping: map[string]string{srcTableName: dstTableName}, - Destination: s.sfHelper.Peer, + Destination: s.Peer().Name, } - flowConnConfig := connectionGen.GenerateFlowConnectionConfigs() + flowConnConfig := connectionGen.GenerateFlowConnectionConfigs(s.t) flowConnConfig.MaxBatchSize = 100 // wait for PeerFlowStatusQuery to finish setup @@ -560,10 +560,10 @@ func (s PeerFlowE2ETestSuiteSF) Test_Multi_Table_SF() { connectionGen := e2e.FlowConnectionGenerationConfig{ FlowJobName: s.attachSuffix("test_multi_table"), TableNameMapping: map[string]string{srcTable1Name: dstTable1Name, srcTable2Name: dstTable2Name}, - Destination: s.sfHelper.Peer, + Destination: s.Peer().Name, } - flowConnConfig := connectionGen.GenerateFlowConnectionConfigs() + flowConnConfig := connectionGen.GenerateFlowConnectionConfigs(s.t) flowConnConfig.MaxBatchSize = 100 // wait for PeerFlowStatusQuery to finish setup @@ -615,10 +615,10 @@ func (s PeerFlowE2ETestSuiteSF) Test_Composite_PKey_SF() { connectionGen := e2e.FlowConnectionGenerationConfig{ FlowJobName: s.attachSuffix("test_cpkey_flow"), TableNameMapping: map[string]string{srcTableName: dstTableName}, - Destination: s.sfHelper.Peer, + Destination: s.Peer().Name, } - flowConnConfig := connectionGen.GenerateFlowConnectionConfigs() + flowConnConfig := connectionGen.GenerateFlowConnectionConfigs(s.t) flowConnConfig.MaxBatchSize = 100 // wait for PeerFlowStatusQuery to finish setup @@ -670,10 +670,10 @@ func (s PeerFlowE2ETestSuiteSF) Test_Composite_PKey_Toast_1_SF() { connectionGen := e2e.FlowConnectionGenerationConfig{ FlowJobName: s.attachSuffix("test_cpkey_toast1_flow"), TableNameMapping: map[string]string{srcTableName: dstTableName}, - Destination: s.sfHelper.Peer, + Destination: s.Peer().Name, } - flowConnConfig := connectionGen.GenerateFlowConnectionConfigs() + flowConnConfig := connectionGen.GenerateFlowConnectionConfigs(s.t) flowConnConfig.MaxBatchSize = 100 flowConnConfig.SoftDelete = false flowConnConfig.SoftDeleteColName = "" @@ -733,10 +733,10 @@ func (s PeerFlowE2ETestSuiteSF) Test_Composite_PKey_Toast_2_SF() { connectionGen := e2e.FlowConnectionGenerationConfig{ FlowJobName: s.attachSuffix(tableName), TableNameMapping: map[string]string{srcTableName: dstTableName}, - Destination: s.sfHelper.Peer, + Destination: s.Peer().Name, } - flowConnConfig := connectionGen.GenerateFlowConnectionConfigs() + flowConnConfig := connectionGen.GenerateFlowConnectionConfigs(s.t) flowConnConfig.MaxBatchSize = 100 // wait for PeerFlowStatusQuery to finish setup @@ -787,8 +787,8 @@ func (s PeerFlowE2ETestSuiteSF) Test_Column_Exclusion() { require.NoError(s.t, err) config := &protos.FlowConnectionConfigs{ - FlowJobName: s.attachSuffix(tableName), - Destination: s.sfHelper.Peer, + FlowJobName: s.attachSuffix(tableName), + DestinationName: s.Peer().Name, TableMappings: []*protos.TableMapping{ { SourceTableIdentifier: srcTableName, @@ -796,7 +796,7 @@ func (s PeerFlowE2ETestSuiteSF) Test_Column_Exclusion() { Exclude: []string{"c2"}, }, }, - Source: e2e.GeneratePostgresPeer(), + SourceName: e2e.GeneratePostgresPeer(s.t).Name, SyncedAtColName: "_PEERDB_SYNCED_AT", MaxBatchSize: 100, } @@ -856,15 +856,15 @@ func (s PeerFlowE2ETestSuiteSF) Test_Soft_Delete_Basic() { require.NoError(s.t, err) config := &protos.FlowConnectionConfigs{ - FlowJobName: s.attachSuffix(dstName), - Destination: s.sfHelper.Peer, + FlowJobName: s.attachSuffix(dstName), + DestinationName: s.Peer().Name, TableMappings: []*protos.TableMapping{ { SourceTableIdentifier: srcTableName, DestinationTableIdentifier: dstTableName, }, }, - Source: e2e.GeneratePostgresPeer(), + SourceName: e2e.GeneratePostgresPeer(s.t).Name, SoftDelete: true, SoftDeleteColName: "_PEERDB_IS_DELETED", SyncedAtColName: "_PEERDB_SYNCED_AT", @@ -925,15 +925,15 @@ func (s PeerFlowE2ETestSuiteSF) Test_Soft_Delete_IUD_Same_Batch() { require.NoError(s.t, err) config := &protos.FlowConnectionConfigs{ - FlowJobName: s.attachSuffix("test_softdel_iud"), - Destination: s.sfHelper.Peer, + FlowJobName: s.attachSuffix("test_softdel_iud"), + DestinationName: s.Peer().Name, TableMappings: []*protos.TableMapping{ { SourceTableIdentifier: srcTableName, DestinationTableIdentifier: dstTableName, }, }, - Source: e2e.GeneratePostgresPeer(), + SourceName: e2e.GeneratePostgresPeer(s.t).Name, SoftDelete: true, SoftDeleteColName: "_PEERDB_IS_DELETED", SyncedAtColName: "_PEERDB_SYNCED_AT", @@ -997,15 +997,15 @@ func (s PeerFlowE2ETestSuiteSF) Test_Soft_Delete_UD_Same_Batch() { require.NoError(s.t, err) config := &protos.FlowConnectionConfigs{ - FlowJobName: s.attachSuffix(dstName), - Destination: s.sfHelper.Peer, + FlowJobName: s.attachSuffix(dstName), + DestinationName: s.Peer().Name, TableMappings: []*protos.TableMapping{ { SourceTableIdentifier: srcTableName, DestinationTableIdentifier: dstTableName, }, }, - Source: e2e.GeneratePostgresPeer(), + SourceName: e2e.GeneratePostgresPeer(s.t).Name, SoftDelete: true, SoftDeleteColName: "_PEERDB_IS_DELETED", SyncedAtColName: "_PEERDB_SYNCED_AT", @@ -1073,15 +1073,15 @@ func (s PeerFlowE2ETestSuiteSF) Test_Soft_Delete_Insert_After_Delete() { require.NoError(s.t, err) config := &protos.FlowConnectionConfigs{ - FlowJobName: s.attachSuffix(tableName), - Destination: s.sfHelper.Peer, + FlowJobName: s.attachSuffix(tableName), + DestinationName: s.Peer().Name, TableMappings: []*protos.TableMapping{ { SourceTableIdentifier: srcTableName, DestinationTableIdentifier: dstTableName, }, }, - Source: e2e.GeneratePostgresPeer(), + SourceName: e2e.GeneratePostgresPeer(s.t).Name, SoftDelete: true, SoftDeleteColName: "_PEERDB_IS_DELETED", SyncedAtColName: "_PEERDB_SYNCED_AT", @@ -1144,10 +1144,10 @@ func (s PeerFlowE2ETestSuiteSF) Test_Supported_Mixed_Case_Table_SF() { connectionGen := e2e.FlowConnectionGenerationConfig{ FlowJobName: s.attachSuffix("test_mixed_case"), TableNameMapping: map[string]string{srcTableName: dstTableName}, - Destination: s.sfHelper.Peer, + Destination: s.Peer().Name, } - flowConnConfig := connectionGen.GenerateFlowConnectionConfigs() + flowConnConfig := connectionGen.GenerateFlowConnectionConfigs(s.t) flowConnConfig.MaxBatchSize = 100 // wait for PeerFlowStatusQuery to finish setup @@ -1197,8 +1197,8 @@ func (s PeerFlowE2ETestSuiteSF) Test_Column_Exclusion_With_Schema_Changes() { require.NoError(s.t, err) config := &protos.FlowConnectionConfigs{ - FlowJobName: s.attachSuffix(tableName), - Destination: s.sfHelper.Peer, + FlowJobName: s.attachSuffix(tableName), + DestinationName: s.Peer().Name, TableMappings: []*protos.TableMapping{ { SourceTableIdentifier: srcTableName, @@ -1206,7 +1206,7 @@ func (s PeerFlowE2ETestSuiteSF) Test_Column_Exclusion_With_Schema_Changes() { Exclude: []string{"c2"}, }, }, - Source: e2e.GeneratePostgresPeer(), + SourceName: e2e.GeneratePostgresPeer(s.t).Name, MaxBatchSize: 100, } diff --git a/flow/e2e/snowflake/qrep_flow_sf_test.go b/flow/e2e/snowflake/qrep_flow_sf_test.go index 63aa20aee1..7fed8ada0a 100644 --- a/flow/e2e/snowflake/qrep_flow_sf_test.go +++ b/flow/e2e/snowflake/qrep_flow_sf_test.go @@ -59,19 +59,19 @@ func (s PeerFlowE2ETestSuiteSF) Test_Complete_QRep_Flow_Avro_SF() { query := fmt.Sprintf("SELECT * FROM e2e_test_%s.%s WHERE updated_at BETWEEN {{.start}} AND {{.end}}", s.pgSuffix, tblName) - qrepConfig, err := e2e.CreateQRepWorkflowConfig( + qrepConfig := e2e.CreateQRepWorkflowConfig( + s.t, "test_qrep_flow_avro_sf", fmt.Sprintf("e2e_test_%s.%s", s.pgSuffix, tblName), dstSchemaQualified, query, - s.sfHelper.Peer, + s.Peer().Name, "", false, "", "", ) qrepConfig.SetupWatermarkTableOnDestination = true - require.NoError(s.t, err) env := e2e.RunQRepFlowWorkflow(tc, qrepConfig) e2e.EnvWaitForFinished(s.t, env, 3*time.Minute) @@ -80,8 +80,7 @@ func (s PeerFlowE2ETestSuiteSF) Test_Complete_QRep_Flow_Avro_SF() { sel := e2e.GetOwnersSelectorStringsSF() s.compareTableContentsWithDiffSelectorsSF(tblName, sel[0], sel[1]) - err = s.checkJSONValue(dstSchemaQualified, "f7", "key", "\"value\"") - require.NoError(s.t, err) + require.NoError(s.t, s.checkJSONValue(dstSchemaQualified, "f7", "key", "\"value\"")) } func (s PeerFlowE2ETestSuiteSF) Test_Complete_QRep_Flow_Avro_SF_Upsert_Simple() { @@ -97,12 +96,13 @@ func (s PeerFlowE2ETestSuiteSF) Test_Complete_QRep_Flow_Avro_SF_Upsert_Simple() query := fmt.Sprintf("SELECT * FROM e2e_test_%s.%s WHERE updated_at BETWEEN {{.start}} AND {{.end}}", s.pgSuffix, tblName) - qrepConfig, err := e2e.CreateQRepWorkflowConfig( + qrepConfig := e2e.CreateQRepWorkflowConfig( + s.t, "test_qrep_flow_avro_sf", fmt.Sprintf("e2e_test_%s.%s", s.pgSuffix, tblName), dstSchemaQualified, query, - s.sfHelper.Peer, + s.Peer().Name, "", false, "", @@ -113,7 +113,6 @@ func (s PeerFlowE2ETestSuiteSF) Test_Complete_QRep_Flow_Avro_SF_Upsert_Simple() UpsertKeyColumns: []string{"id"}, } qrepConfig.SetupWatermarkTableOnDestination = true - require.NoError(s.t, err) env := e2e.RunQRepFlowWorkflow(tc, qrepConfig) e2e.EnvWaitForFinished(s.t, env, 3*time.Minute) @@ -136,18 +135,18 @@ func (s PeerFlowE2ETestSuiteSF) Test_Complete_QRep_Flow_Avro_SF_S3() { query := fmt.Sprintf("SELECT * FROM e2e_test_%s.%s WHERE updated_at BETWEEN {{.start}} AND {{.end}}", s.pgSuffix, tblName) - qrepConfig, err := e2e.CreateQRepWorkflowConfig( + qrepConfig := e2e.CreateQRepWorkflowConfig( + s.t, "test_qrep_flow_avro_sf", s.attachSchemaSuffix(tblName), dstSchemaQualified, query, - s.sfHelper.Peer, + s.Peer().Name, "", false, "", "", ) - require.NoError(s.t, err) qrepConfig.StagingPath = fmt.Sprintf("s3://peerdb-test-bucket/avro/%s", uuid.New()) qrepConfig.SetupWatermarkTableOnDestination = true @@ -172,12 +171,13 @@ func (s PeerFlowE2ETestSuiteSF) Test_Complete_QRep_Flow_Avro_SF_Upsert_XMIN() { query := fmt.Sprintf("SELECT * FROM e2e_test_%s.%s", s.pgSuffix, tblName) - qrepConfig, err := e2e.CreateQRepWorkflowConfig( + qrepConfig := e2e.CreateQRepWorkflowConfig( + s.t, "test_qrep_flow_avro_sf_xmin", fmt.Sprintf("e2e_test_%s.%s", s.pgSuffix, tblName), dstSchemaQualified, query, - s.sfHelper.Peer, + s.Peer().Name, "", false, "", @@ -189,7 +189,6 @@ func (s PeerFlowE2ETestSuiteSF) Test_Complete_QRep_Flow_Avro_SF_Upsert_XMIN() { } qrepConfig.WatermarkColumn = "xmin" qrepConfig.SetupWatermarkTableOnDestination = true - require.NoError(s.t, err) env := e2e.RunXminFlowWorkflow(tc, qrepConfig) e2e.EnvWaitForFinished(s.t, env, 3*time.Minute) @@ -212,21 +211,20 @@ func (s PeerFlowE2ETestSuiteSF) Test_Complete_QRep_Flow_Avro_SF_S3_Integration() query := fmt.Sprintf("SELECT * FROM e2e_test_%s.%s WHERE updated_at BETWEEN {{.start}} AND {{.end}}", s.pgSuffix, tblName) - sfPeer := s.sfHelper.Peer - sfPeer.GetSnowflakeConfig().S3Integration = "peerdb_s3_integration" + s.sfHelper.Config.S3Integration = "peerdb_s3_integration" - qrepConfig, err := e2e.CreateQRepWorkflowConfig( + qrepConfig := e2e.CreateQRepWorkflowConfig( + s.t, "test_qrep_flow_avro_sf_int", s.attachSchemaSuffix(tblName), dstSchemaQualified, query, - sfPeer, + s.Peer().Name, "", false, "", "", ) - require.NoError(s.t, err) qrepConfig.StagingPath = fmt.Sprintf("s3://peerdb-test-bucket/avro/%s", uuid.New()) qrepConfig.SetupWatermarkTableOnDestination = true @@ -251,12 +249,13 @@ func (s PeerFlowE2ETestSuiteSF) Test_PeerDB_Columns_QRep_SF() { query := fmt.Sprintf("SELECT * FROM e2e_test_%s.%s WHERE updated_at BETWEEN {{.start}} AND {{.end}}", s.pgSuffix, tblName) - qrepConfig, err := e2e.CreateQRepWorkflowConfig( + qrepConfig := e2e.CreateQRepWorkflowConfig( + s.t, "test_columns_qrep_sf", fmt.Sprintf("e2e_test_%s.%s", s.pgSuffix, tblName), dstSchemaQualified, query, - s.sfHelper.Peer, + s.Peer().Name, "", true, "_PEERDB_SYNCED_AT", @@ -267,14 +266,12 @@ func (s PeerFlowE2ETestSuiteSF) Test_PeerDB_Columns_QRep_SF() { UpsertKeyColumns: []string{"id"}, } qrepConfig.SetupWatermarkTableOnDestination = true - require.NoError(s.t, err) env := e2e.RunQRepFlowWorkflow(tc, qrepConfig) e2e.EnvWaitForFinished(s.t, env, 3*time.Minute) require.NoError(s.t, env.Error()) - err = s.sfHelper.checkSyncedAt(`SELECT "_PEERDB_SYNCED_AT" FROM ` + dstSchemaQualified) - require.NoError(s.t, err) + require.NoError(s.t, s.sfHelper.checkSyncedAt(`SELECT "_PEERDB_SYNCED_AT" FROM `+dstSchemaQualified)) } func (s PeerFlowE2ETestSuiteSF) Test_Soft_Delete_Default_False_SF() { @@ -290,12 +287,13 @@ func (s PeerFlowE2ETestSuiteSF) Test_Soft_Delete_Default_False_SF() { query := fmt.Sprintf("SELECT * FROM e2e_test_%s.%s WHERE updated_at BETWEEN {{.start}} AND {{.end}}", s.pgSuffix, tblName) - qrepConfig, err := e2e.CreateQRepWorkflowConfig( + qrepConfig := e2e.CreateQRepWorkflowConfig( + s.t, "test_deleted_false_qrep_sf", fmt.Sprintf("e2e_test_%s.%s", s.pgSuffix, tblName), dstSchemaQualified, query, - s.sfHelper.Peer, + s.Peer().Name, "", true, "_PEERDB_SYNCED_AT", @@ -306,12 +304,10 @@ func (s PeerFlowE2ETestSuiteSF) Test_Soft_Delete_Default_False_SF() { UpsertKeyColumns: []string{"id"}, } qrepConfig.SetupWatermarkTableOnDestination = true - require.NoError(s.t, err) env := e2e.RunQRepFlowWorkflow(tc, qrepConfig) e2e.EnvWaitForFinished(s.t, env, 3*time.Minute) require.NoError(s.t, env.Error()) - err = s.sfHelper.checkIsDeleted(`SELECT "_PEERDB_IS_DELETED" FROM ` + dstSchemaQualified) - require.NoError(s.t, err) + require.NoError(s.t, s.sfHelper.checkIsDeleted(`SELECT "_PEERDB_IS_DELETED" FROM `+dstSchemaQualified)) } diff --git a/flow/e2e/snowflake/snowflake.go b/flow/e2e/snowflake/snowflake.go index cf02014a25..97b7451f54 100644 --- a/flow/e2e/snowflake/snowflake.go +++ b/flow/e2e/snowflake/snowflake.go @@ -49,7 +49,16 @@ func (s PeerFlowE2ETestSuiteSF) Suffix() string { } func (s PeerFlowE2ETestSuiteSF) Peer() *protos.Peer { - return s.sfHelper.Peer + s.t.Helper() + ret := &protos.Peer{ + Name: e2e.AddSuffix(s, "test_sf_peer"), + Type: protos.DBType_SNOWFLAKE, + Config: &protos.Peer_SnowflakeConfig{ + SnowflakeConfig: s.sfHelper.Config, + }, + } + e2e.CreatePeer(s.t, ret) + return ret } func (s PeerFlowE2ETestSuiteSF) DestinationTable(table string) string { @@ -76,7 +85,7 @@ func SetupSuite(t *testing.T) PeerFlowE2ETestSuiteSF { t.Fatalf("failed to setup Postgres: %v", err) } - sfHelper, err := NewSnowflakeTestHelper() + sfHelper, err := NewSnowflakeTestHelper(t) if err != nil { t.Fatalf("failed to setup Snowflake: %v", err) } diff --git a/flow/e2e/snowflake/snowflake_helper.go b/flow/e2e/snowflake/snowflake_helper.go index af380ef158..ca57b5b473 100644 --- a/flow/e2e/snowflake/snowflake_helper.go +++ b/flow/e2e/snowflake/snowflake_helper.go @@ -6,6 +6,7 @@ import ( "errors" "fmt" "os" + "testing" connsnowflake "github.com/PeerDB-io/peer-flow/connectors/snowflake" "github.com/PeerDB-io/peer-flow/e2eshared" @@ -18,8 +19,6 @@ import ( type SnowflakeTestHelper struct { // config is the Snowflake config. Config *protos.SnowflakeConfig - // peer struct holder Snowflake - Peer *protos.Peer // connection to another database, to manage the test database adminClient *connsnowflake.SnowflakeClient // connection to the test database @@ -30,7 +29,9 @@ type SnowflakeTestHelper struct { testDatabaseName string } -func NewSnowflakeTestHelper() (*SnowflakeTestHelper, error) { +func NewSnowflakeTestHelper(t *testing.T) (*SnowflakeTestHelper, error) { + t.Helper() + jsonPath := os.Getenv("TEST_SF_CREDS") if jsonPath == "" { return nil, errors.New("TEST_SF_CREDS env var not set") @@ -42,12 +43,10 @@ func NewSnowflakeTestHelper() (*SnowflakeTestHelper, error) { } var config *protos.SnowflakeConfig - err = json.Unmarshal(content, &config) - if err != nil { + if err := json.Unmarshal(content, &config); err != nil { return nil, fmt.Errorf("failed to unmarshal json: %w", err) } - peer := generateSFPeer(config) runID, err := shared.RandomUInt64() if err != nil { return nil, fmt.Errorf("failed to generate random uint64: %w", err) @@ -75,7 +74,6 @@ func NewSnowflakeTestHelper() (*SnowflakeTestHelper, error) { return &SnowflakeTestHelper{ Config: config, - Peer: peer, adminClient: adminClient, testClient: testClient, testSchemaName: "PUBLIC", @@ -83,18 +81,6 @@ func NewSnowflakeTestHelper() (*SnowflakeTestHelper, error) { }, nil } -func generateSFPeer(snowflakeConfig *protos.SnowflakeConfig) *protos.Peer { - ret := &protos.Peer{} - ret.Name = "test_sf_peer" - ret.Type = protos.DBType_SNOWFLAKE - - ret.Config = &protos.Peer_SnowflakeConfig{ - SnowflakeConfig: snowflakeConfig, - } - - return ret -} - // Cleanup drops the database. func (s *SnowflakeTestHelper) Cleanup() error { err := s.testClient.Close() @@ -114,32 +100,17 @@ func (s *SnowflakeTestHelper) RunCommand(command string) error { } // CountRows(tableName) returns the number of rows in the given table. -func (s *SnowflakeTestHelper) CountRows(tableName string) (int, error) { - res, err := s.testClient.CountRows(context.Background(), s.testSchemaName, tableName) - if err != nil { - return 0, err - } - - return int(res), nil +func (s *SnowflakeTestHelper) CountRows(tableName string) (int64, error) { + return s.testClient.CountRows(context.Background(), s.testSchemaName, tableName) } // CountRows(tableName) returns the non-null number of rows in the given table. -func (s *SnowflakeTestHelper) CountNonNullRows(tableName string, columnName string) (int, error) { - res, err := s.testClient.CountNonNullRows(context.Background(), s.testSchemaName, tableName, columnName) - if err != nil { - return 0, err - } - - return int(res), nil +func (s *SnowflakeTestHelper) CountNonNullRows(tableName string, columnName string) (int64, error) { + return s.testClient.CountNonNullRows(context.Background(), s.testSchemaName, tableName, columnName) } -func (s *SnowflakeTestHelper) CountSRIDs(tableName string, columnName string) (int, error) { - res, err := s.testClient.CountSRIDs(context.Background(), s.testSchemaName, tableName, columnName) - if err != nil { - return 0, err - } - - return int(res), nil +func (s *SnowflakeTestHelper) CountSRIDs(tableName string, columnName string) (int64, error) { + return s.testClient.CountSRIDs(context.Background(), s.testSchemaName, tableName, columnName) } func (s *SnowflakeTestHelper) CheckNull(tableName string, colNames []string) (bool, error) { diff --git a/flow/e2e/snowflake/snowflake_schema_delta_test.go b/flow/e2e/snowflake/snowflake_schema_delta_test.go index 5e4addec53..aa15ac9910 100644 --- a/flow/e2e/snowflake/snowflake_schema_delta_test.go +++ b/flow/e2e/snowflake/snowflake_schema_delta_test.go @@ -28,7 +28,7 @@ type SnowflakeSchemaDeltaTestSuite struct { func setupSchemaDeltaSuite(t *testing.T) SnowflakeSchemaDeltaTestSuite { t.Helper() - sfTestHelper, err := NewSnowflakeTestHelper() + sfTestHelper, err := NewSnowflakeTestHelper(t) if err != nil { t.Fatalf("Error in test: %v", err) } diff --git a/flow/e2e/sqlserver/qrep_flow_sqlserver_test.go b/flow/e2e/sqlserver/qrep_flow_sqlserver_test.go index 8df35168b7..95193d6922 100644 --- a/flow/e2e/sqlserver/qrep_flow_sqlserver_test.go +++ b/flow/e2e/sqlserver/qrep_flow_sqlserver_test.go @@ -45,6 +45,19 @@ func (s PeerFlowE2ETestSuiteSQLServer) Suffix() string { return s.suffix } +func (s PeerFlowE2ETestSuiteSQLServer) Peer() *protos.Peer { + s.t.Helper() + ret := &protos.Peer{ + Name: e2e.AddSuffix(s, "sqlspeer"), + Type: protos.DBType_SQLSERVER, + Config: &protos.Peer_SqlserverConfig{ + SqlserverConfig: s.sqlsHelper.config, + }, + } + e2e.CreatePeer(s.t, ret) + return ret +} + func TestCDCFlowE2ETestSuiteSQLServer(t *testing.T) { e2eshared.RunSuite(t, SetupSuite) } @@ -72,7 +85,7 @@ func SetupSuite(t *testing.T) PeerFlowE2ETestSuiteSQLServer { if env != "true" { sqlsHelper = nil } else { - sqlsHelper, err = NewSQLServerHelper("test_sqlserver_peer") + sqlsHelper, err = NewSQLServerHelper() require.NoError(t, err) } @@ -155,12 +168,10 @@ func (s PeerFlowE2ETestSuiteSQLServer) Test_Complete_QRep_Flow_SqlServer_Append( query := fmt.Sprintf("SELECT * FROM %s.%s WHERE v_from BETWEEN {{.start}} AND {{.end}}", s.sqlsHelper.SchemaName, tblName) - postgresPeer := e2e.GeneratePostgresPeer() - qrepConfig := &protos.QRepConfig{ FlowJobName: tblName, - SourcePeer: s.sqlsHelper.GetPeer(), - DestinationPeer: postgresPeer, + SourceName: s.Peer().Name, + DestinationName: e2e.GeneratePostgresPeer(s.t).Name, DestinationTableIdentifier: dstTableName, Query: query, WatermarkTable: srcTableName, diff --git a/flow/e2e/sqlserver/sqlserver_helper.go b/flow/e2e/sqlserver/sqlserver_helper.go index f764b7accf..056922800c 100644 --- a/flow/e2e/sqlserver/sqlserver_helper.go +++ b/flow/e2e/sqlserver/sqlserver_helper.go @@ -14,15 +14,14 @@ import ( ) type SQLServerHelper struct { - peerName string - config *protos.SqlServerConfig + config *protos.SqlServerConfig E peersql.SQLQueryExecutor SchemaName string tables []string } -func NewSQLServerHelper(name string) (*SQLServerHelper, error) { +func NewSQLServerHelper() (*SQLServerHelper, error) { port, err := strconv.ParseUint(os.Getenv("SQLSERVER_PORT"), 10, 16) if err != nil { return nil, fmt.Errorf("invalid SQLSERVER_PORT: %s", os.Getenv("SQLSERVER_PORT")) @@ -52,13 +51,11 @@ func NewSQLServerHelper(name string) (*SQLServerHelper, error) { } testSchema := fmt.Sprintf("e2e_test_%d", rndNum) - err = connector.CreateSchema(context.Background(), testSchema) - if err != nil { + if err := connector.CreateSchema(context.Background(), testSchema); err != nil { return nil, err } return &SQLServerHelper{ - peerName: name, config: config, E: connector, SchemaName: testSchema, @@ -75,16 +72,6 @@ func (h *SQLServerHelper) CreateTable(schema *qvalue.QRecordSchema, tableName st return nil } -func (h *SQLServerHelper) GetPeer() *protos.Peer { - return &protos.Peer{ - Name: h.peerName, - Type: protos.DBType_SQLSERVER, - Config: &protos.Peer_SqlserverConfig{ - SqlserverConfig: h.config, - }, - } -} - func (h *SQLServerHelper) CleanUp() error { for _, tbl := range h.tables { err := h.E.ExecuteQuery(context.Background(), fmt.Sprintf("DROP TABLE %s.%s", h.SchemaName, tbl)) diff --git a/flow/e2e/test_utils.go b/flow/e2e/test_utils.go index 2678ffde9e..6a9462f0f1 100644 --- a/flow/e2e/test_utils.go +++ b/flow/e2e/test_utils.go @@ -398,33 +398,37 @@ func PopulateSourceTable(conn *pgx.Conn, suffix string, tableName string, rowCou } func CreateQRepWorkflowConfig( + t *testing.T, flowJobName string, sourceTable string, dstTable string, query string, - dest *protos.Peer, + dest string, stagingPath string, setupDst bool, syncedAtCol string, isDeletedCol string, -) (*protos.QRepConfig, error) { - connectionGen := QRepFlowConnectionGenerationConfig{ +) *protos.QRepConfig { + t.Helper() + + return &protos.QRepConfig{ FlowJobName: flowJobName, WatermarkTable: sourceTable, DestinationTableIdentifier: dstTable, - Destination: dest, + SourceName: GeneratePostgresPeer(t).Name, + DestinationName: dest, + Query: query, + WatermarkColumn: "updated_at", StagingPath: stagingPath, + WriteMode: &protos.QRepWriteMode{ + WriteType: protos.QRepWriteType_QREP_WRITE_MODE_APPEND, + }, + NumRowsPerPartition: 1000, + InitialCopyOnly: true, + SyncedAtColName: syncedAtCol, + SetupWatermarkTableOnDestination: setupDst, + SoftDeleteColName: isDeletedCol, } - - watermark := "updated_at" - - qrepConfig := connectionGen.GenerateQRepConfig(query, watermark) - qrepConfig.InitialCopyOnly = true - qrepConfig.SyncedAtColName = syncedAtCol - qrepConfig.SetupWatermarkTableOnDestination = setupDst - qrepConfig.SoftDeleteColName = isDeletedCol - - return qrepConfig, nil } func RunQRepFlowWorkflow(tc client.Client, config *protos.QRepConfig) WorkflowRun { diff --git a/flow/workflows/cdc_flow.go b/flow/workflows/cdc_flow.go index b3776fd2fc..03f0813f32 100644 --- a/flow/workflows/cdc_flow.go +++ b/flow/workflows/cdc_flow.go @@ -1,3 +1,4 @@ +//nolint:staticcheck // TODO remove in 0.15 package peerflow import ( @@ -182,34 +183,6 @@ func addCdcPropertiesSignalListener( }) } -func reloadPeers(ctx workflow.Context, logger log.Logger, cfg *protos.FlowConnectionConfigs) error { - reloadPeersCtx := workflow.WithActivityOptions(ctx, workflow.ActivityOptions{ - StartToCloseTimeout: 5 * time.Minute, - }) - - logger.Info("reloading source peer", slog.String("peerName", cfg.Source.Name)) - srcFuture := workflow.ExecuteActivity(reloadPeersCtx, flowable.LoadPeer, cfg.Source.Name) - var srcPeer *protos.Peer - if err := srcFuture.Get(reloadPeersCtx, &srcPeer); err != nil { - logger.Error("failed to load source peer", slog.Any("error", err)) - return fmt.Errorf("failed to load source peer: %w", err) - } - logger.Info("reloaded peer", slog.String("peerName", cfg.Source.Name)) - - logger.Info("reloading destination peer", slog.String("peerName", cfg.Destination.Name)) - dstFuture := workflow.ExecuteActivity(reloadPeersCtx, flowable.LoadPeer, cfg.Destination.Name) - var dstPeer *protos.Peer - if err := dstFuture.Get(reloadPeersCtx, &dstPeer); err != nil { - logger.Error("failed to load destination peer", slog.Any("error", err)) - return fmt.Errorf("failed to load destination peer: %w", err) - } - logger.Info("reloaded peer", slog.String("peerName", cfg.Destination.Name)) - - cfg.Source = srcPeer - cfg.Destination = dstPeer - return nil -} - func CDCFlowWorkflow( ctx workflow.Context, cfg *protos.FlowConnectionConfigs, @@ -270,13 +243,6 @@ func CDCFlowWorkflow( return state, err } - // reload peers in case of EDIT PEER - err := reloadPeers(ctx, logger, cfg) - if err != nil { - logger.Error("failed to reload peers", slog.Any("error", err)) - return state, fmt.Errorf("failed to reload peers: %w", err) - } - if state.FlowConfigUpdate != nil { err = processCDCFlowConfigUpdate(ctx, logger, cfg, state, mirrorNameSearch) if err != nil { @@ -294,6 +260,30 @@ func CDCFlowWorkflow( state.CurrentFlowStatus = protos.FlowStatus_STATUS_RUNNING } + // TODO remove fields in 0.15 + state.RelationMessageMapping = nil + save_cfg := false + if cfg.Source != nil { + cfg.SourceName = cfg.Source.Name + cfg.Source = nil + save_cfg = true + } + if cfg.Destination != nil { + cfg.DestinationName = cfg.Destination.Name + cfg.Destination = nil + save_cfg = true + } + if save_cfg { + saveCtx := workflow.WithActivityOptions(ctx, workflow.ActivityOptions{ + StartToCloseTimeout: time.Hour, + HeartbeatTimeout: time.Minute, + }) + saveFuture := workflow.ExecuteActivity(saveCtx, flowable.UpdateCdcFlowConfigInCatalog, cfg) + if err := saveFuture.Get(saveCtx, nil); err != nil { + return state, fmt.Errorf("failed to save updated config: %w", err) + } + } + originalRunID := workflow.GetInfo(ctx).OriginalRunID // we cannot skip SetupFlow if SnapshotFlow did not complete in cases where Resync is enabled @@ -364,13 +354,16 @@ func CDCFlowWorkflow( } if cfg.Resync { - renameOpts := &protos.RenameTablesInput{} - renameOpts.FlowJobName = cfg.FlowJobName - renameOpts.Peer = cfg.Destination - if cfg.SoftDelete { + renameOpts := &protos.RenameTablesInput{ + FlowJobName: cfg.FlowJobName, + PeerName: cfg.DestinationName, + } + if cfg.SyncedAtColName != "" { + renameOpts.SyncedAtColName = &cfg.SyncedAtColName + } + if cfg.SoftDelete && cfg.SoftDeleteColName != "" { renameOpts.SoftDeleteColName = &cfg.SoftDeleteColName } - renameOpts.SyncedAtColName = &cfg.SyncedAtColName correctedTableNameSchemaMapping := make(map[string]*protos.TableSchema) for _, mapping := range state.SyncFlowOptions.TableMappings { oldName := mapping.DestinationTableIdentifier diff --git a/flow/workflows/local_activities.go b/flow/workflows/local_activities.go index 3b7c2441f1..8ce01ea55f 100644 --- a/flow/workflows/local_activities.go +++ b/flow/workflows/local_activities.go @@ -1,12 +1,15 @@ package peerflow import ( + "context" "log/slog" "time" "go.temporal.io/sdk/log" "go.temporal.io/sdk/workflow" + "github.com/PeerDB-io/peer-flow/connectors" + "github.com/PeerDB-io/peer-flow/generated/protos" "github.com/PeerDB-io/peer-flow/peerdbenv" ) @@ -41,3 +44,22 @@ func getMaxSyncsPerCDCFlow(wCtx workflow.Context, logger log.Logger) uint32 { } return maxSyncsPerCDCFlow } + +func localPeerType(ctx context.Context, name string) (protos.DBType, error) { + pool, err := peerdbenv.GetCatalogConnectionPoolFromEnv(ctx) + if err != nil { + return 0, err + } + return connectors.LoadPeerType(ctx, pool, name) +} + +func getPeerType(wCtx workflow.Context, name string) (protos.DBType, error) { + checkCtx := workflow.WithLocalActivityOptions(wCtx, workflow.LocalActivityOptions{ + StartToCloseTimeout: time.Minute, + }) + + getFuture := workflow.ExecuteLocalActivity(checkCtx, localPeerType, name) + var dbtype protos.DBType + err := getFuture.Get(checkCtx, &dbtype) + return dbtype, err +} diff --git a/flow/workflows/qrep_flow.go b/flow/workflows/qrep_flow.go index 6ddb45cf25..94d03d1099 100644 --- a/flow/workflows/qrep_flow.go +++ b/flow/workflows/qrep_flow.go @@ -1,3 +1,4 @@ +//nolint:staticcheck // TODO remove in 0.15 package peerflow import ( @@ -11,7 +12,6 @@ import ( "go.temporal.io/sdk/temporal" "go.temporal.io/sdk/workflow" - "github.com/PeerDB-io/peer-flow/activities" "github.com/PeerDB-io/peer-flow/generated/protos" "github.com/PeerDB-io/peer-flow/model" "github.com/PeerDB-io/peer-flow/shared" @@ -105,10 +105,10 @@ func (q *QRepFlowExecution) getTableSchema(ctx workflow.Context, tableName strin }) tableSchemaInput := &protos.GetTableSchemaBatchInput{ - PeerConnectionConfig: q.config.SourcePeer, - TableIdentifiers: []string{tableName}, - FlowName: q.config.FlowJobName, - System: q.config.System, + PeerName: q.config.SourceName, + TableIdentifiers: []string{tableName}, + FlowName: q.config.FlowJobName, + System: q.config.System, } future := workflow.ExecuteActivity(ctx, flowable.GetTableSchema, tableSchemaInput) @@ -145,7 +145,7 @@ func (q *QRepFlowExecution) setupWatermarkTableOnDestination(ctx workflow.Contex // now setup the normalized tables on the destination peer setupConfig := &protos.SetupNormalizedTableBatchInput{ - PeerConnectionConfig: q.config.DestinationPeer, + PeerName: q.config.DestinationName, TableNameSchemaMapping: map[string]*protos.TableSchema{ q.config.DestinationTableIdentifier: watermarkTableSchema, }, @@ -369,7 +369,7 @@ func (q *QRepFlowExecution) handleTableCreationForResync(ctx workflow.Context, s createTablesFromExistingFuture := workflow.ExecuteActivity( createTablesFromExistingCtx, flowable.CreateTablesFromExisting, &protos.CreateTablesFromExistingInput{ FlowJobName: q.config.FlowJobName, - Peer: q.config.DestinationPeer, + PeerName: q.config.DestinationName, NewToExistingTableMapping: map[string]string{ renamedTableIdentifier: q.config.DestinationTableIdentifier, }, @@ -385,9 +385,10 @@ func (q *QRepFlowExecution) handleTableCreationForResync(ctx workflow.Context, s func (q *QRepFlowExecution) handleTableRenameForResync(ctx workflow.Context, state *protos.QRepFlowState) error { if state.NeedsResync && q.config.DstTableFullResync { oldTableIdentifier := strings.TrimSuffix(q.config.DestinationTableIdentifier, "_peerdb_resync") - renameOpts := &protos.RenameTablesInput{} - renameOpts.FlowJobName = q.config.FlowJobName - renameOpts.Peer = q.config.DestinationPeer + renameOpts := &protos.RenameTablesInput{ + FlowJobName: q.config.FlowJobName, + PeerName: q.config.DestinationName, + } tblSchema, err := q.getTableSchema(ctx, q.config.DestinationTableIdentifier) if err != nil { @@ -467,12 +468,11 @@ func QRepWaitForNewRowsWorkflow(ctx workflow.Context, config *protos.QRepConfig, }, }) - var result activities.QRepWaitUntilNewRowsResult - err := workflow.ExecuteActivity(ctx, flowable.QRepHasNewRows, config, lastPartition).Get(ctx, &result) + var hasNewRows bool + err := workflow.ExecuteActivity(ctx, flowable.QRepHasNewRows, config, lastPartition).Get(ctx, &hasNewRows) if err != nil { return fmt.Errorf("error checking for new rows: %w", err) } - hasNewRows := result.Found // If no new rows are found, continue as new if !hasNewRows { @@ -522,7 +522,6 @@ func QRepFlowWorkflow( signalChan := model.FlowSignal.GetSignalChannel(ctx) q := newQRepFlowExecution(ctx, config, originalRunID) - logger := q.logger if state.CurrentFlowStatus == protos.FlowStatus_STATUS_PAUSING || state.CurrentFlowStatus == protos.FlowStatus_STATUS_PAUSED { @@ -531,7 +530,7 @@ func QRepFlowWorkflow( state.CurrentFlowStatus = protos.FlowStatus_STATUS_PAUSED for q.activeSignal == model.PauseSignal { - logger.Info(fmt.Sprintf("mirror has been paused for %s", time.Since(startTime).Round(time.Second))) + q.logger.Info(fmt.Sprintf("mirror has been paused for %s", time.Since(startTime).Round(time.Second))) // only place we block on receive, so signal processing is immediate val, ok, _ := signalChan.ReceiveWithTimeout(ctx, 1*time.Minute) if ok { @@ -543,6 +542,30 @@ func QRepFlowWorkflow( state.CurrentFlowStatus = protos.FlowStatus_STATUS_RUNNING } + // TODO remove fields in 0.15 + state.DisableWaitForNewRows = false + save_cfg := false + if config.SourcePeer != nil { + config.SourceName = config.SourcePeer.Name + config.SourcePeer = nil + save_cfg = true + } + if config.DestinationPeer != nil { + config.DestinationName = config.DestinationPeer.Name + config.DestinationPeer = nil + save_cfg = true + } + if save_cfg { + saveCtx := workflow.WithActivityOptions(ctx, workflow.ActivityOptions{ + StartToCloseTimeout: time.Hour, + HeartbeatTimeout: time.Minute, + }) + saveFuture := workflow.ExecuteActivity(saveCtx, flowable.UpdateQRepFlowConfigInCatalog, config) + if err := saveFuture.Get(saveCtx, nil); err != nil { + return state, fmt.Errorf("failed to save updated config: %w", err) + } + } + maxParallelWorkers := 16 if config.MaxParallelWorkers > 0 { maxParallelWorkers = int(config.MaxParallelWorkers) @@ -557,7 +580,7 @@ func QRepFlowWorkflow( if err != nil { return state, fmt.Errorf("failed to setup metadata tables: %w", err) } - logger.Info("metadata tables setup for peer flow") + q.logger.Info("metadata tables setup for peer flow") err = q.handleTableCreationForResync(ctx, state) if err != nil { @@ -571,24 +594,24 @@ func QRepFlowWorkflow( } if q.activeSignal != model.PauseSignal { - logger.Info("fetching partitions to replicate for peer flow") + q.logger.Info("fetching partitions to replicate for peer flow") partitions, err := q.getPartitions(ctx, state.LastPartition) if err != nil { return state, fmt.Errorf("failed to get partitions: %w", err) } - logger.Info(fmt.Sprintf("%d partitions to replicate", len(partitions.Partitions))) + q.logger.Info(fmt.Sprintf("%d partitions to replicate", len(partitions.Partitions))) if err := q.processPartitions(ctx, maxParallelWorkers, partitions.Partitions); err != nil { return state, err } - logger.Info("consolidating partitions for peer flow") + q.logger.Info("consolidating partitions for peer flow") if err := q.consolidatePartitions(ctx); err != nil { return state, err } if config.InitialCopyOnly { - logger.Info("initial copy completed for peer flow") + q.logger.Info("initial copy completed for peer flow") return state, nil } @@ -597,7 +620,7 @@ func QRepFlowWorkflow( return state, err } - logger.Info(fmt.Sprintf("%d partitions processed", len(partitions.Partitions))) + q.logger.Info(fmt.Sprintf("%d partitions processed", len(partitions.Partitions))) state.NumPartitionsProcessed += uint64(len(partitions.Partitions)) if len(partitions.Partitions) > 0 { @@ -614,7 +637,7 @@ func QRepFlowWorkflow( q.activeSignal = model.FlowSignalHandler(q.activeSignal, val, q.logger) } - logger.Info("Continuing as new workflow", + q.logger.Info("Continuing as new workflow", slog.Any("Last Partition", state.LastPartition), slog.Uint64("Number of Partitions Processed", state.NumPartitionsProcessed)) diff --git a/flow/workflows/setup_flow.go b/flow/workflows/setup_flow.go index 5df1746f4f..3600db83a3 100644 --- a/flow/workflows/setup_flow.go +++ b/flow/workflows/setup_flow.go @@ -61,7 +61,7 @@ func (s *SetupFlowExecution) checkConnectionsAndSetupMetadataTables( // first check the source peer connection srcConnStatusFuture := workflow.ExecuteLocalActivity(checkCtx, flowable.CheckConnection, &protos.SetupInput{ - Peer: config.Source, + PeerName: config.SourceName, FlowName: config.FlowJobName, }) var srcConnStatus activities.CheckConnectionResult @@ -70,7 +70,7 @@ func (s *SetupFlowExecution) checkConnectionsAndSetupMetadataTables( } dstSetupInput := &protos.SetupInput{ - Peer: config.Destination, + PeerName: config.DestinationName, FlowName: config.FlowJobName, } @@ -117,7 +117,7 @@ func (s *SetupFlowExecution) ensurePullability( // create EnsurePullabilityInput for the srcTableName ensurePullabilityInput := &protos.EnsurePullabilityBatchInput{ - PeerConnectionConfig: config.Source, + PeerName: config.SourceName, FlowJobName: s.cdcFlowName, SourceTableIdentifiers: srcTblIdentifiers, CheckConstraints: checkConstraints, @@ -153,9 +153,9 @@ func (s *SetupFlowExecution) createRawTable( // attempt to create the tables. createRawTblInput := &protos.CreateRawTableInput{ - PeerConnectionConfig: config.Destination, - FlowJobName: s.cdcFlowName, - TableNameMapping: s.tableNameMapping, + PeerName: config.DestinationName, + FlowJobName: s.cdcFlowName, + TableNameMapping: s.tableNameMapping, } rawTblFuture := workflow.ExecuteActivity(ctx, flowable.CreateRawTable, createRawTblInput) @@ -182,10 +182,10 @@ func (s *SetupFlowExecution) fetchTableSchemaAndSetupNormalizedTables( sort.Strings(sourceTables) tableSchemaInput := &protos.GetTableSchemaBatchInput{ - PeerConnectionConfig: flowConnectionConfigs.Source, - TableIdentifiers: sourceTables, - FlowName: s.cdcFlowName, - System: flowConnectionConfigs.System, + PeerName: flowConnectionConfigs.SourceName, + TableIdentifiers: sourceTables, + FlowName: s.cdcFlowName, + System: flowConnectionConfigs.System, } future := workflow.ExecuteActivity(ctx, flowable.GetTableSchema, tableSchemaInput) @@ -206,7 +206,7 @@ func (s *SetupFlowExecution) fetchTableSchemaAndSetupNormalizedTables( // now setup the normalized tables on the destination peer setupConfig := &protos.SetupNormalizedTableBatchInput{ - PeerConnectionConfig: flowConnectionConfigs.Destination, + PeerName: flowConnectionConfigs.DestinationName, TableNameSchemaMapping: normalizedTableMapping, SoftDeleteColName: flowConnectionConfigs.SoftDeleteColName, SyncedAtColName: flowConnectionConfigs.SyncedAtColName, @@ -235,12 +235,10 @@ func (s *SetupFlowExecution) executeSetupFlow( return nil, fmt.Errorf("failed to check connections and setup metadata tables: %w", err) } - setupFlowOutput := protos.SetupFlowOutput{} srcTableIdNameMapping, err := s.ensurePullability(ctx, config, !config.InitialSnapshotOnly) if err != nil { return nil, fmt.Errorf("failed to ensure pullability: %w", err) } - setupFlowOutput.SrcTableIdNameMapping = srcTableIdNameMapping // for initial copy only flows, we don't need to create the raw table if !config.InitialSnapshotOnly { @@ -255,9 +253,11 @@ func (s *SetupFlowExecution) executeSetupFlow( if err != nil { return nil, fmt.Errorf("failed to fetch table schema and setup normalized tables: %w", err) } - setupFlowOutput.TableNameSchemaMapping = tableNameSchemaMapping - return &setupFlowOutput, nil + return &protos.SetupFlowOutput{ + SrcTableIdNameMapping: srcTableIdNameMapping, + TableNameSchemaMapping: tableNameSchemaMapping, + }, nil } // SetupFlowWorkflow is the workflow that sets up the flow. diff --git a/flow/workflows/snapshot_flow.go b/flow/workflows/snapshot_flow.go index 5616350079..32ecb8335f 100644 --- a/flow/workflows/snapshot_flow.go +++ b/flow/workflows/snapshot_flow.go @@ -33,14 +33,6 @@ type SnapshotFlowExecution struct { tableNameSchemaMapping map[string]*protos.TableSchema } -type cloneTablesInput struct { - slotName string - snapshotName string - snapshotType snapshotType - supportsTIDScans bool - maxParallelClones int -} - // ensurePullability ensures that the source peer is pullable. func (s *SnapshotFlowExecution) setupReplication( ctx workflow.Context, @@ -61,7 +53,7 @@ func (s *SnapshotFlowExecution) setupReplication( } setupReplicationInput := &protos.SetupReplicationInput{ - PeerConnectionConfig: s.config.Source, + PeerName: s.config.SourceName, FlowJobName: flowName, TableNameMapping: tblNameMapping, DoInitialSnapshot: s.config.DoInitialSnapshot, @@ -75,7 +67,7 @@ func (s *SnapshotFlowExecution) setupReplication( return nil, fmt.Errorf("failed to setup replication on source peer: %w", err) } - s.logger.Info("replication slot live for on source for peer flow") + s.logger.Info("replication slot live on source for peer flow") return res, nil } @@ -83,14 +75,13 @@ func (s *SnapshotFlowExecution) setupReplication( func (s *SnapshotFlowExecution) closeSlotKeepAlive( ctx workflow.Context, ) error { - flowName := s.config.FlowJobName s.logger.Info("closing slot keep alive for peer flow") ctx = workflow.WithActivityOptions(ctx, workflow.ActivityOptions{ StartToCloseTimeout: 15 * time.Minute, }) - if err := workflow.ExecuteActivity(ctx, snapshot.CloseSlotKeepAlive, flowName).Get(ctx, nil); err != nil { + if err := workflow.ExecuteActivity(ctx, snapshot.CloseSlotKeepAlive, s.config.FlowJobName).Get(ctx, nil); err != nil { return fmt.Errorf("failed to close slot keep alive for peer flow: %w", err) } @@ -127,11 +118,6 @@ func (s *SnapshotFlowExecution) cloneTable( TaskQueue: taskQueue, }) - // we know that the source is postgres as setup replication output is non-nil - // only for postgres - sourcePostgres := s.config.Source - sourcePostgres.GetPostgresConfig().TransactionSnapshot = snapshotName - parsedSrcTable, err := utils.ParseSchemaTable(srcName) if err != nil { s.logger.Error("unable to parse source table", slog.Any("error", err), cloneLog) @@ -175,7 +161,11 @@ func (s *SnapshotFlowExecution) cloneTable( } // ensure document IDs are synchronized across initial load and CDC // for the same document - if s.config.Destination.Type == protos.DBType_ELASTICSEARCH { + dbtype, err := getPeerType(ctx, s.config.DestinationName) + if err != nil { + return err + } + if dbtype == protos.DBType_ELASTICSEARCH { snapshotWriteMode = &protos.QRepWriteMode{ WriteType: protos.QRepWriteType_QREP_WRITE_MODE_UPSERT, UpsertKeyColumns: s.tableNameSchemaMapping[mapping.DestinationTableIdentifier].PrimaryKeyColumns, @@ -184,12 +174,13 @@ func (s *SnapshotFlowExecution) cloneTable( config := &protos.QRepConfig{ FlowJobName: childWorkflowID, - SourcePeer: sourcePostgres, - DestinationPeer: s.config.Destination, + SourceName: s.config.SourceName, + DestinationName: s.config.DestinationName, Query: query, WatermarkColumn: mapping.PartitionKey, WatermarkTable: srcName, InitialCopyOnly: true, + SnapshotName: snapshotName, DestinationTableIdentifier: dstName, NumRowsPerPartition: numRowsPerPartition, MaxParallelWorkers: numWorkers, @@ -207,25 +198,28 @@ func (s *SnapshotFlowExecution) cloneTable( func (s *SnapshotFlowExecution) cloneTables( ctx workflow.Context, - cloneTablesInput *cloneTablesInput, + snapshotType snapshotType, + slotName string, + snapshotName string, + supportsTIDScans bool, + maxParallelClones int, ) error { - if cloneTablesInput.snapshotType == SNAPSHOT_TYPE_SLOT { + if snapshotType == SNAPSHOT_TYPE_SLOT { s.logger.Info(fmt.Sprintf("cloning tables for slot name %s and snapshotName %s", - cloneTablesInput.slotName, cloneTablesInput.snapshotName)) - } else if cloneTablesInput.snapshotType == SNAPSHOT_TYPE_TX { + slotName, snapshotName)) + } else if snapshotType == SNAPSHOT_TYPE_TX { s.logger.Info("cloning tables in txn snapshot mode with snapshotName " + - cloneTablesInput.snapshotName) + snapshotName) } - boundSelector := shared.NewBoundSelector(ctx, "CloneTablesSelector", cloneTablesInput.maxParallelClones) + boundSelector := shared.NewBoundSelector(ctx, "CloneTablesSelector", maxParallelClones) defaultPartitionCol := "ctid" - if !cloneTablesInput.supportsTIDScans { + if !supportsTIDScans { s.logger.Info("Postgres version too old for TID scans, might use full table partitions!") defaultPartitionCol = "" } - snapshotName := cloneTablesInput.snapshotName for _, v := range s.config.TableMappings { source := v.SourceTableIdentifier destination := v.DestinationTableIdentifier @@ -258,20 +252,19 @@ func (s *SnapshotFlowExecution) cloneTablesWithSlot( sessionCtx workflow.Context, numTablesInParallel int, ) error { - logger := s.logger slotInfo, err := s.setupReplication(sessionCtx) if err != nil { return fmt.Errorf("failed to setup replication: %w", err) } - logger.Info(fmt.Sprintf("cloning %d tables in parallel", numTablesInParallel)) - if err := s.cloneTables(ctx, &cloneTablesInput{ - snapshotType: SNAPSHOT_TYPE_SLOT, - slotName: slotInfo.SlotName, - snapshotName: slotInfo.SnapshotName, - supportsTIDScans: slotInfo.SupportsTidScans, - maxParallelClones: numTablesInParallel, - }); err != nil { + s.logger.Info(fmt.Sprintf("cloning %d tables in parallel", numTablesInParallel)) + if err := s.cloneTables(ctx, + SNAPSHOT_TYPE_SLOT, + slotInfo.SlotName, + slotInfo.SnapshotName, + slotInfo.SupportsTidScans, + numTablesInParallel, + ); err != nil { return fmt.Errorf("failed to clone tables: %w", err) } @@ -291,7 +284,8 @@ func SnapshotFlowWorkflow( config: config, tableNameSchemaMapping: tableNameSchemaMapping, logger: log.With(workflow.GetLogger(ctx), - slog.String(string(shared.FlowNameKey), config.FlowJobName)), + slog.String(string(shared.FlowNameKey), config.FlowJobName), + slog.String("sourcePeer", config.SourceName)), } numTablesInParallel := int(max(config.SnapshotNumTablesInParallel, 1)) @@ -334,7 +328,7 @@ func SnapshotFlowWorkflow( exportCtx, snapshot.MaintainTx, sessionInfo.SessionID, - config.Source, + config.SourceName, ) fExportSnapshot := workflow.ExecuteActivity( @@ -362,13 +356,13 @@ func SnapshotFlowWorkflow( return sessionError } - if err := se.cloneTables(ctx, &cloneTablesInput{ - snapshotType: SNAPSHOT_TYPE_TX, - slotName: "", - snapshotName: txnSnapshotState.SnapshotName, - supportsTIDScans: txnSnapshotState.SupportsTIDScans, - maxParallelClones: numTablesInParallel, - }); err != nil { + if err := se.cloneTables(ctx, + SNAPSHOT_TYPE_TX, + "", + txnSnapshotState.SnapshotName, + txnSnapshotState.SupportsTIDScans, + numTablesInParallel, + ); err != nil { return fmt.Errorf("failed to clone tables: %w", err) } } else if err := se.cloneTablesWithSlot(ctx, sessionCtx, numTablesInParallel); err != nil { diff --git a/flow/workflows/sync_flow.go b/flow/workflows/sync_flow.go index d5b9704254..63acb3766a 100644 --- a/flow/workflows/sync_flow.go +++ b/flow/workflows/sync_flow.go @@ -146,10 +146,10 @@ func SyncFlowWorkflow( }) getModifiedSchemaFuture := workflow.ExecuteActivity(getModifiedSchemaCtx, flowable.GetTableSchema, &protos.GetTableSchemaBatchInput{ - PeerConnectionConfig: config.Source, - TableIdentifiers: modifiedSrcTables, - FlowName: config.FlowJobName, - System: config.System, + PeerName: config.SourceName, + TableIdentifiers: modifiedSrcTables, + FlowName: config.FlowJobName, + System: config.System, }) var getModifiedSchemaRes *protos.GetTableSchemaBatchOutput diff --git a/nexus/analyzer/src/lib.rs b/nexus/analyzer/src/lib.rs index c843a17b46..fbb4951d54 100644 --- a/nexus/analyzer/src/lib.rs +++ b/nexus/analyzer/src/lib.rs @@ -675,7 +675,6 @@ fn parse_db_options(db_type: DbType, with_options: &[SqlOption]) -> anyhow::Resu .context("no default database specified")? .to_string(), metadata_schema: opts.get("metadata_schema").map(|s| s.to_string()), - transaction_snapshot: "".to_string(), ssh_config: ssh_fields, }; diff --git a/nexus/catalog/src/lib.rs b/nexus/catalog/src/lib.rs index a817ba3bd8..f25754496b 100644 --- a/nexus/catalog/src/lib.rs +++ b/nexus/catalog/src/lib.rs @@ -51,8 +51,8 @@ pub struct CatalogConfig<'a> { #[derive(Debug, Clone)] pub struct WorkflowDetails { pub workflow_id: String, - pub source_peer: pt::peerdb_peers::Peer, - pub destination_peer: pt::peerdb_peers::Peer, + pub source_peer: String, + pub destination_peer: String, } impl<'a> CatalogConfig<'a> { @@ -64,7 +64,6 @@ impl<'a> CatalogConfig<'a> { user: self.user.to_string(), password: self.password.to_string(), database: self.database.to_string(), - transaction_snapshot: "".to_string(), metadata_schema: Some("".to_string()), ssh_config: None, } @@ -229,6 +228,25 @@ impl Catalog { } } + pub async fn get_peer_name_by_id(&self, peer_id: i32) -> anyhow::Result { + let stmt = self + .pg + .prepare_typed( + "SELECT name FROM public.peers WHERE id = $1", + &[], + ) + .await?; + + let row = self.pg.query_opt(&stmt, &[&peer_id]).await?; + if let Some(row) = row { + let name: String = row.get(0); + Ok(name) + } else { + Err(anyhow::anyhow!("No peer with id {} found", peer_id)) + } + + } + pub async fn get_peer_by_id(&self, peer_id: i32) -> anyhow::Result { let stmt = self .pg @@ -238,9 +256,8 @@ impl Catalog { ) .await?; - let rows = self.pg.query(&stmt, &[&peer_id]).await?; - - if let Some(row) = rows.first() { + let row = self.pg.query_opt(&stmt, &[&peer_id]).await?; + if let Some(row) = row { let name: &str = row.get(0); let peer_type: i32 = row.get(1); let options: &[u8] = row.get(2); @@ -439,9 +456,9 @@ impl Catalog { &self, flow_job_name: &str, ) -> anyhow::Result> { - let rows = self + let row = self .pg - .query( + .query_opt( "SELECT workflow_id, source_peer, destination_peer FROM public.flows WHERE NAME = $1", &[&flow_job_name], ) @@ -449,36 +466,35 @@ impl Catalog { // currently multiple rows for a flow job exist in catalog, but all mapped to same workflow id // CHANGE LOGIC IF THIS ASSUMPTION CHANGES - if rows.is_empty() { + if let Some(first_row) = row { + let workflow_id: Option = first_row.get(0); + let Some(workflow_id) = workflow_id else { + return Err(anyhow!( + "workflow id not found for existing flow job {}", + flow_job_name + )); + }; + let source_peer_id: i32 = first_row.get(1); + let destination_peer_id: i32 = first_row.get(2); + + let source_peer = self + .get_peer_name_by_id(source_peer_id) + .await + .context("unable to get source peer")?; + let destination_peer = self + .get_peer_name_by_id(destination_peer_id) + .await + .context("unable to get destination peer")?; + + Ok(Some(WorkflowDetails { + workflow_id, + source_peer, + destination_peer, + })) + } else { tracing::info!("no workflow id found for flow job {}", flow_job_name); - return Ok(None); + Ok(None) } - - let first_row = rows.first().unwrap(); - let workflow_id: Option = first_row.get(0); - let Some(workflow_id) = workflow_id else { - return Err(anyhow!( - "workflow id not found for existing flow job {}", - flow_job_name - )); - }; - let source_peer_id: i32 = first_row.get(1); - let destination_peer_id: i32 = first_row.get(2); - - let source_peer = self - .get_peer_by_id(source_peer_id) - .await - .context("unable to get source peer")?; - let destination_peer = self - .get_peer_by_id(destination_peer_id) - .await - .context("unable to get destination peer")?; - - Ok(Some(WorkflowDetails { - workflow_id, - source_peer, - destination_peer, - })) } pub async fn delete_flow_job_entry(&self, flow_job_name: &str) -> anyhow::Result<()> { diff --git a/nexus/flow-rs/src/grpc.rs b/nexus/flow-rs/src/grpc.rs index cbb0e10a0c..15e0130dd4 100644 --- a/nexus/flow-rs/src/grpc.rs +++ b/nexus/flow-rs/src/grpc.rs @@ -111,8 +111,8 @@ impl FlowGrpcClient { let state_change_req = pt::peerdb_route::FlowStateChangeRequest { flow_job_name: flow_job_name.to_owned(), requested_flow_state: state.into(), - source_peer: Some(workflow_details.source_peer), - destination_peer: Some(workflow_details.destination_peer), + source_peer: workflow_details.source_peer, + destination_peer: workflow_details.destination_peer, flow_config_update, }; let response = self.client.flow_state_change(state_change_req).await?; @@ -130,8 +130,8 @@ impl FlowGrpcClient { pub async fn start_peer_flow_job( &mut self, job: &FlowJob, - src: pt::peerdb_peers::Peer, - dst: pt::peerdb_peers::Peer, + src: String, + dst: String, ) -> anyhow::Result { let table_mappings: Vec = job .table_mappings @@ -154,9 +154,12 @@ impl FlowGrpcClient { return anyhow::Result::Err(anyhow::anyhow!("invalid system {}", job.system)); }; + #[allow(deprecated)] let mut flow_conn_cfg = pt::peerdb_flow::FlowConnectionConfigs { - source: Some(src), - destination: Some(dst), + source: None, + destination: None, + source_name: src, + destination_name: dst, flow_job_name: job.name.clone(), table_mappings, do_initial_snapshot, @@ -196,12 +199,12 @@ impl FlowGrpcClient { pub async fn start_qrep_flow_job( &mut self, job: &QRepFlowJob, - src: pt::peerdb_peers::Peer, - dst: pt::peerdb_peers::Peer, + src: String, + dst: String, ) -> anyhow::Result { let mut cfg = pt::peerdb_flow::QRepConfig { - source_peer: Some(src), - destination_peer: Some(dst), + source_name: src, + destination_name: dst, flow_job_name: job.name.clone(), query: job.query_string.clone(), ..Default::default() diff --git a/nexus/server/src/main.rs b/nexus/server/src/main.rs index 2a555741c1..bb0ba3f1ff 100644 --- a/nexus/server/src/main.rs +++ b/nexus/server/src/main.rs @@ -13,7 +13,6 @@ use clap::Parser; use cursor::PeerCursors; use dashmap::{mapref::entry::Entry as DashEntry, DashMap}; use flow_rs::grpc::{FlowGrpcClient, PeerValidationResult}; -use futures::join; use peer_connections::{PeerConnectionTracker, PeerConnections}; use peer_cursor::{ util::{records_to_query_response, sendable_stream_to_query_response}, @@ -159,13 +158,6 @@ impl NexusBackend { Ok(workflow_details) } - async fn get_peer_of_mirror(catalog: &Catalog, peer_name: &str) -> PgWireResult { - let peer = catalog.get_peer(peer_name).await.map_err(|err| { - PgWireError::ApiError(format!("unable to get peer {:?}: {:?}", peer_name, err).into()) - })?; - Ok(peer) - } - fn handle_mirror_existence( if_not_exists: bool, flow_name: &str, @@ -416,18 +408,10 @@ impl NexusBackend { } } - // get source and destination peers - let (src_peer, dst_peer) = join!( - Self::get_peer_of_mirror(self.catalog.as_ref(), &flow_job.source_peer), - Self::get_peer_of_mirror(self.catalog.as_ref(), &flow_job.target_peer), - ); - let src_peer = src_peer?; - let dst_peer = dst_peer?; - // make a request to the flow service to start the job. let mut flow_handler = self.flow_handler.as_ref().unwrap().lock().await; flow_handler - .start_peer_flow_job(flow_job, src_peer, dst_peer) + .start_peer_flow_job(flow_job, flow_job.source_peer.clone(), flow_job.target_peer.clone()) .await .map_err(|err| { PgWireError::ApiError( @@ -775,23 +759,10 @@ impl NexusBackend { } async fn run_qrep_mirror(&self, qrep_flow_job: &QRepFlowJob) -> PgWireResult { - let (src_peer, dst_peer) = join!( - self.catalog.get_peer(&qrep_flow_job.source_peer), - self.catalog.get_peer(&qrep_flow_job.target_peer), - ); - // get source and destination peers - let src_peer = src_peer.map_err(|err| { - PgWireError::ApiError(format!("unable to get source peer: {:?}", err).into()) - })?; - - let dst_peer = dst_peer.map_err(|err| { - PgWireError::ApiError(format!("unable to get destination peer: {:?}", err).into()) - })?; - // make a request to the flow service to start the job. let mut flow_handler = self.flow_handler.as_ref().unwrap().lock().await; let workflow_id = flow_handler - .start_qrep_flow_job(qrep_flow_job, src_peer, dst_peer) + .start_qrep_flow_job(qrep_flow_job, qrep_flow_job.source_peer.clone(), qrep_flow_job.target_peer.clone()) .await .map_err(|err| { PgWireError::ApiError(format!("unable to submit job: {:?}", err).into()) diff --git a/protos/flow.proto b/protos/flow.proto index 0545ee114f..dc89510dfd 100644 --- a/protos/flow.proto +++ b/protos/flow.proto @@ -32,16 +32,15 @@ message TableMapping { } message SetupInput { - peerdb_peers.Peer peer = 1; string flow_name = 2; + string peer_name = 3; } message FlowConnectionConfigs { string flow_job_name = 1; - // source and destination peer - peerdb_peers.Peer source = 2; - peerdb_peers.Peer destination = 3; + peerdb_peers.Peer source = 2 [deprecated = true]; + peerdb_peers.Peer destination = 3 [deprecated = true]; // config for the CDC flow itself // currently, TableMappings, MaxBatchSize and IdleTimeoutSeconds are dynamic via Temporal signals @@ -71,8 +70,11 @@ message FlowConnectionConfigs { string synced_at_col_name = 19; string script = 20; - TypeSystem system = 21; + + // source and destination peer + string source_name = 22; + string destination_name = 23; } message RenameTableOption { @@ -83,10 +85,10 @@ message RenameTableOption { message RenameTablesInput { string flow_job_name = 1; - peerdb_peers.Peer peer = 2; repeated RenameTableOption rename_table_options = 3; optional string soft_delete_col_name = 4; optional string synced_at_col_name = 5; + string peer_name = 6; } message RenameTablesOutput { @@ -95,8 +97,8 @@ message RenameTablesOutput { message CreateTablesFromExistingInput { string flow_job_name = 1; - peerdb_peers.Peer peer = 2; map new_to_existing_table_mapping = 3; + string peer_name = 4; } message CreateTablesFromExistingOutput { @@ -105,8 +107,7 @@ message CreateTablesFromExistingOutput { message SyncFlowOptions { uint32 batch_size = 1; - // deprecated field - map relation_message_mapping = 2; + map relation_message_mapping = 2 [deprecated = true]; uint64 idle_timeout_seconds = 3; map src_table_id_name_mapping = 4; map table_name_schema_mapping = 5; @@ -121,10 +122,10 @@ message StartNormalizeInput { } message EnsurePullabilityBatchInput { - peerdb_peers.Peer peer_connection_config = 1; string flow_job_name = 2; repeated string source_table_identifiers = 3; bool check_constraints = 4; + string peer_name = 5; } message PostgresTableIdentifier { @@ -136,14 +137,15 @@ message EnsurePullabilityBatchOutput { } message SetupReplicationInput { - peerdb_peers.Peer peer_connection_config = 1; string flow_job_name = 2; map table_name_mapping = 3; + // replicate to destination using ctid - peerdb_peers.Peer destination_peer = 4; bool do_initial_snapshot = 5; string existing_publication_name = 6; string existing_replication_slot_name = 7; + string peer_name = 8; + string destination_name = 9; } message SetupReplicationOutput { @@ -153,9 +155,9 @@ message SetupReplicationOutput { } message CreateRawTableInput { - peerdb_peers.Peer peer_connection_config = 1; string flow_job_name = 2; map table_name_mapping = 3; + string peer_name = 4; } message CreateRawTableOutput { string table_identifier = 1; } @@ -175,10 +177,10 @@ message FieldDescription { } message GetTableSchemaBatchInput { - peerdb_peers.Peer peer_connection_config = 1; repeated string table_identifiers = 2; string flow_name = 3; TypeSystem system = 4; + string peer_name = 5; } message GetTableSchemaBatchOutput { @@ -186,13 +188,13 @@ message GetTableSchemaBatchOutput { } message SetupNormalizedTableBatchInput { - peerdb_peers.Peer peer_connection_config = 1; map table_name_schema_mapping = 2; // migration related columns string soft_delete_col_name = 4; string synced_at_col_name = 5; string flow_name = 6; + string peer_name = 7; } message SetupNormalizedTableOutput { @@ -255,8 +257,8 @@ enum TypeSystem { message QRepConfig { string flow_job_name = 1; - peerdb_peers.Peer source_peer = 2; - peerdb_peers.Peer destination_peer = 3; + peerdb_peers.Peer source_peer = 2 [deprecated = true]; + peerdb_peers.Peer destination_peer = 3 [deprecated = true]; string destination_table_identifier = 4; @@ -299,6 +301,10 @@ message QRepConfig { TypeSystem system = 18; string script = 19; + + string source_name = 20; + string destination_name = 21; + string snapshot_name = 23; } message QRepPartition { @@ -331,7 +337,7 @@ message QRepFlowState { QRepPartition last_partition = 1; uint64 num_partitions_processed = 2; bool needs_resync = 3; - bool disable_wait_for_new_rows = 4; // deprecated + bool disable_wait_for_new_rows = 4 [deprecated = true]; FlowStatus current_flow_status = 5; } diff --git a/protos/peers.proto b/protos/peers.proto index 13cabf58cf..eb7ac528bd 100644 --- a/protos/peers.proto +++ b/protos/peers.proto @@ -70,8 +70,6 @@ message PostgresConfig { string user = 3; string password = 4; string database = 5; - // this is used only in query replication mode right now. - string transaction_snapshot = 6; // defaults to _peerdb_internal optional string metadata_schema = 7; optional SSHConfig ssh_config = 8; diff --git a/protos/route.proto b/protos/route.proto index 06834fecec..fac4f50a7e 100644 --- a/protos/route.proto +++ b/protos/route.proto @@ -40,8 +40,8 @@ message CreateCustomSyncResponse { message ShutdownRequest { string workflow_id = 1; string flow_job_name = 2; - peerdb_peers.Peer source_peer = 3; - peerdb_peers.Peer destination_peer = 4; + string source_peer = 3; + string destination_peer = 4; bool remove_flow_entry = 5; } @@ -206,6 +206,8 @@ message CDCMirrorStatus { peerdb_flow.FlowConnectionConfigs config = 1; SnapshotStatus snapshot_status = 2; repeated CDCSyncStatus cdc_syncs = 3; + peerdb_peers.DBType source_type = 4; + peerdb_peers.DBType destination_type = 5; } message MirrorStatusResponse { @@ -226,8 +228,8 @@ message ValidateCDCMirrorResponse{ message FlowStateChangeRequest { string flow_job_name = 1; peerdb_flow.FlowStatus requested_flow_state = 2; - peerdb_peers.Peer source_peer = 3; - peerdb_peers.Peer destination_peer = 4; + string source_peer = 3; + string destination_peer = 4; // only can be sent in certain situations optional peerdb_flow.FlowConfigUpdate flow_config_update = 5; } diff --git a/ui/app/api/alert-config/route.ts b/ui/app/api/alert-config/route.ts index 9b8ff019ed..6d87d648bd 100644 --- a/ui/app/api/alert-config/route.ts +++ b/ui/app/api/alert-config/route.ts @@ -19,12 +19,8 @@ export async function POST(request: Request) { service_config: alertConfigReq.serviceConfig, }, }); - let createStatus: 'success' | 'error' = 'error'; - if (createRes.id) { - createStatus = 'success'; - } - return new Response(createStatus); + return new Response(createRes.id ? 'success' : 'error'); } export async function DELETE(request: Request) { @@ -34,12 +30,8 @@ export async function DELETE(request: Request) { id: configDeleteReq.id, }, }); - let deleteStatus: 'success' | 'error' = 'error'; - if (deleteRes.id) { - deleteStatus = 'success'; - } - return new Response(deleteStatus); + return new Response(deleteRes.id ? 'success' : 'error'); } export async function PUT(request: Request) { @@ -53,9 +45,6 @@ export async function PUT(request: Request) { id: alertConfigReq.id, }, }); - let editStatus: 'success' | 'error' = 'error'; - if (editRes.id) { - editStatus = 'success'; - } - return new Response(editStatus); + + return new Response(editRes.id ? 'success' : 'error'); } diff --git a/ui/app/api/mirrors/route.ts b/ui/app/api/mirrors/route.ts index 65e5e3fe6c..4bc2df4a2f 100644 --- a/ui/app/api/mirrors/route.ts +++ b/ui/app/api/mirrors/route.ts @@ -1,9 +1,9 @@ -import { getTruePeer } from '@/app/api/peers/getTruePeer'; +import { MirrorsListing } from '@/app/dto/MirrorsDTO'; import prisma from '@/app/utils/prisma'; export const dynamic = 'force-dynamic'; -export async function GET(request: Request) { +export async function GET(_request: Request) { const mirrors = await prisma.flows.findMany({ distinct: 'name', include: { @@ -12,14 +12,16 @@ export async function GET(request: Request) { }, }); - // using any as type because of the way prisma returns data - const flows = mirrors?.map((mirror: any) => { - let newMirror: any = { - ...mirror, - sourcePeer: getTruePeer(mirror.sourcePeer), - destinationPeer: getTruePeer(mirror.destinationPeer), - }; - return newMirror; - }); + const flows: MirrorsListing[] = mirrors?.map((mirror) => ({ + id: mirror.id, + workflowId: mirror.workflow_id, + name: mirror.name, + sourceName: mirror.sourcePeer.name, + sourceType: mirror.sourcePeer.type, + destinationName: mirror.destinationPeer.name, + destinationType: mirror.destinationPeer.type, + createdAt: mirror.created_at, + isCdc: !mirror.query_string, + })); return new Response(JSON.stringify(flows)); } diff --git a/ui/app/api/peers/getTruePeer.ts b/ui/app/api/peers/getTruePeer.ts index c8b5c55a37..55435a30d0 100644 --- a/ui/app/api/peers/getTruePeer.ts +++ b/ui/app/api/peers/getTruePeer.ts @@ -3,7 +3,6 @@ import { BigqueryConfig, ClickhouseConfig, ElasticsearchConfig, - EventHubConfig, EventHubGroupConfig, KafkaConfig, MySqlConfig, @@ -21,63 +20,39 @@ export const getTruePeer = (peer: CatalogPeer) => { type: peer.type, }; const options = peer.options; - let config: - | BigqueryConfig - | ClickhouseConfig - | EventHubConfig - | EventHubGroupConfig - | KafkaConfig - | MySqlConfig - | PostgresConfig - | PubSubConfig - | S3Config - | SnowflakeConfig - | SqlServerConfig - | ElasticsearchConfig; switch (peer.type) { case 0: - config = BigqueryConfig.decode(options); - newPeer.bigqueryConfig = config; + newPeer.bigqueryConfig = BigqueryConfig.decode(options); break; case 1: - config = SnowflakeConfig.decode(options); - newPeer.snowflakeConfig = config; + newPeer.snowflakeConfig = SnowflakeConfig.decode(options); break; case 3: - config = PostgresConfig.decode(options); - newPeer.postgresConfig = config; + newPeer.postgresConfig = PostgresConfig.decode(options); break; case 5: - config = S3Config.decode(options); - newPeer.s3Config = config; + newPeer.s3Config = S3Config.decode(options); break; case 6: - config = SqlServerConfig.decode(options); - newPeer.sqlserverConfig = config; + newPeer.sqlserverConfig = SqlServerConfig.decode(options); break; case 7: - config = MySqlConfig.decode(options); - newPeer.mysqlConfig = config; + newPeer.mysqlConfig = MySqlConfig.decode(options); break; case 8: - config = ClickhouseConfig.decode(options); - newPeer.clickhouseConfig = config; + newPeer.clickhouseConfig = ClickhouseConfig.decode(options); break; case 9: - config = KafkaConfig.decode(options); - newPeer.kafkaConfig = config; + newPeer.kafkaConfig = KafkaConfig.decode(options); break; case 10: - config = PubSubConfig.decode(options); - newPeer.pubsubConfig = config; + newPeer.pubsubConfig = PubSubConfig.decode(options); break; case 11: - config = EventHubGroupConfig.decode(options); - newPeer.eventhubGroupConfig = config; + newPeer.eventhubGroupConfig = EventHubGroupConfig.decode(options); break; case 12: - config = ElasticsearchConfig.decode(options); - newPeer.elasticsearchConfig = config; + newPeer.elasticsearchConfig = ElasticsearchConfig.decode(options); break; default: return newPeer; diff --git a/ui/app/api/peers/info/[peerName]/route.ts b/ui/app/api/peers/info/[peerName]/route.ts index 54ccb45446..04ad7ee36c 100644 --- a/ui/app/api/peers/info/[peerName]/route.ts +++ b/ui/app/api/peers/info/[peerName]/route.ts @@ -5,7 +5,7 @@ import { ElasticsearchAuthType } from '@/grpc_generated/peers'; import { getTruePeer } from '../../getTruePeer'; export async function GET( - request: NextRequest, + _request: NextRequest, context: { params: { peerName: string } } ) { const peerName = context.params.peerName; @@ -31,7 +31,6 @@ export async function GET( const redactString = '********'; if (pgConfig) { pgConfig.password = redactString; - pgConfig.transactionSnapshot = redactString; if (pgConfig.sshConfig) { pgConfig.sshConfig.password = redactString; diff --git a/ui/app/api/peers/route.ts b/ui/app/api/peers/route.ts index 4f468d8fb2..607f3658ae 100644 --- a/ui/app/api/peers/route.ts +++ b/ui/app/api/peers/route.ts @@ -1,6 +1,4 @@ -import { getTruePeer } from '@/app/api/peers/getTruePeer'; import { - CatalogPeer, PeerConfig, UCreatePeerResponse, UValidatePeerResponse, @@ -151,8 +149,9 @@ export async function POST(request: Request) { } // GET all the peers from the database -export async function GET(request: Request) { - const peers = await prisma.peers.findMany(); - const truePeers: Peer[] = peers.map((peer: CatalogPeer) => getTruePeer(peer)); - return new Response(JSON.stringify(truePeers)); +export async function GET(_request: Request) { + const peers = await prisma.peers.findMany({ + select: { name: true, type: true }, + }); + return new Response(JSON.stringify(peers)); } diff --git a/ui/app/dto/MirrorsDTO.ts b/ui/app/dto/MirrorsDTO.ts index 18c3d35ad6..4a59ddba62 100644 --- a/ui/app/dto/MirrorsDTO.ts +++ b/ui/app/dto/MirrorsDTO.ts @@ -58,3 +58,15 @@ export type MirrorLogsType = { error_type: string; error_timestamp: Date; }[]; + +export type MirrorsListing = { + id: number; + workflowId: string | null; + name: string; + sourceName: string; + sourceType: number; + destinationName: string; + destinationType: number; + createdAt: string | Date; + isCdc: boolean; +}; diff --git a/ui/app/dto/PeersDTO.ts b/ui/app/dto/PeersDTO.ts index abc3bc80fb..81c559e77c 100644 --- a/ui/app/dto/PeersDTO.ts +++ b/ui/app/dto/PeersDTO.ts @@ -1,6 +1,7 @@ import { BigqueryConfig, ClickhouseConfig, + DBType, ElasticsearchConfig, EventHubConfig, EventHubGroupConfig, @@ -60,6 +61,7 @@ export type CatalogPeer = { type: number; options: Buffer; }; +export type PeerRef = { name: string; type: DBType }; export type PeerSetter = React.Dispatch>; export type SlotLagPoint = { diff --git a/ui/app/mirrors/[mirrorId]/cdc.tsx b/ui/app/mirrors/[mirrorId]/cdc.tsx index ca407f8c90..6e9b557896 100644 --- a/ui/app/mirrors/[mirrorId]/cdc.tsx +++ b/ui/app/mirrors/[mirrorId]/cdc.tsx @@ -65,7 +65,7 @@ export function CDCMirror({ diff --git a/ui/app/mirrors/[mirrorId]/cdcDetails.tsx b/ui/app/mirrors/[mirrorId]/cdcDetails.tsx index 456bacb0c7..b431b52f2e 100644 --- a/ui/app/mirrors/[mirrorId]/cdcDetails.tsx +++ b/ui/app/mirrors/[mirrorId]/cdcDetails.tsx @@ -3,8 +3,9 @@ import { SyncStatusRow } from '@/app/dto/MirrorsDTO'; import MirrorInfo from '@/components/MirrorInfo'; import PeerButton from '@/components/PeerComponent'; import TimeLabel from '@/components/TimeComponent'; -import { FlowConnectionConfigs, FlowStatus } from '@/grpc_generated/flow'; +import { FlowStatus } from '@/grpc_generated/flow'; import { dBTypeFromJSON } from '@/grpc_generated/peers'; +import { CDCMirrorStatus } from '@/grpc_generated/route'; import { Label } from '@/lib/Label'; import { ProgressCircle } from '@/lib/ProgressCircle'; import Link from 'next/link'; @@ -16,7 +17,7 @@ import TablePairs from './tablePairs'; type props = { syncs: SyncStatusRow[]; - mirrorConfig: FlowConnectionConfigs; + mirrorConfig: CDCMirrorStatus; createdAt?: Date; mirrorStatus: FlowStatus; }; @@ -30,12 +31,14 @@ function CdcDetails({ syncs, createdAt, mirrorConfig, mirrorStatus }: props) { return acc; }, 0); - const tablesSynced = mirrorConfig.tableMappings; + const tablesSynced = mirrorConfig.config?.tableMappings; useEffect(() => { - getCurrentIdleTimeout(mirrorConfig.flowJobName).then((res) => { - getSyncInterval(res); - }); - }, [mirrorConfig.flowJobName]); + getCurrentIdleTimeout(mirrorConfig.config?.flowJobName ?? '').then( + (res) => { + getSyncInterval(res); + } + ); + }, [mirrorConfig.config?.flowJobName]); return ( <>
@@ -54,7 +57,9 @@ function CdcDetails({ syncs, createdAt, mirrorConfig, mirrorStatus }: props) { border: '1px solid rgba(0,0,0,0.1)', }} > - +
@@ -77,8 +82,8 @@ function CdcDetails({ syncs, createdAt, mirrorConfig, mirrorStatus }: props) {
@@ -90,8 +95,8 @@ function CdcDetails({ syncs, createdAt, mirrorConfig, mirrorStatus }: props) {
@@ -129,7 +134,7 @@ function CdcDetails({ syncs, createdAt, mirrorConfig, mirrorStatus }: props) {
- +
diff --git a/ui/app/mirrors/[mirrorId]/edit/page.tsx b/ui/app/mirrors/[mirrorId]/edit/page.tsx index 3a2f1df7e1..be44228cc4 100644 --- a/ui/app/mirrors/[mirrorId]/edit/page.tsx +++ b/ui/app/mirrors/[mirrorId]/edit/page.tsx @@ -88,8 +88,8 @@ const EditMirror = ({ params: { mirrorId } }: EditMirrorProps) => { setLoading(true); const req: FlowStateChangeRequest = { flowJobName: mirrorId, - sourcePeer: mirrorState.cdcStatus?.config?.source, - destinationPeer: mirrorState.cdcStatus?.config?.destination, + sourcePeer: mirrorState.cdcStatus?.config?.sourceName ?? '', + destinationPeer: mirrorState.cdcStatus?.config?.destinationName ?? '', requestedFlowState: FlowStatus.STATUS_UNKNOWN, flowConfigUpdate: { cdcFlowConfigUpdate: { ...config, additionalTables }, @@ -187,8 +187,8 @@ const EditMirror = ({ params: { mirrorId } }: EditMirrorProps) => { )} { const req: FlowStateChangeRequest = { flowJobName: mirrorConfig.flowJobName, - sourcePeer: mirrorConfig.source, - destinationPeer: mirrorConfig.destination, + sourcePeer: mirrorConfig.sourceName, + destinationPeer: mirrorConfig.destinationName, requestedFlowState: flowState, }; await fetch(`/api/mirrors/state_change`, { diff --git a/ui/app/mirrors/[mirrorId]/page.tsx b/ui/app/mirrors/[mirrorId]/page.tsx index 523dbc8c3d..182ac5ddd7 100644 --- a/ui/app/mirrors/[mirrorId]/page.tsx +++ b/ui/app/mirrors/[mirrorId]/page.tsx @@ -94,7 +94,7 @@ export default async function ViewMirror({ ); - const dbType = mirrorConfig.destination!.type; + const dbType = mirrorStatus.cdcStatus.destinationType; const isNotPaused = mirrorStatus.currentFlowState.toString() !== diff --git a/ui/app/mirrors/create/cdc/cdc.tsx b/ui/app/mirrors/create/cdc/cdc.tsx index d55ba671ff..63af9e6e6c 100644 --- a/ui/app/mirrors/create/cdc/cdc.tsx +++ b/ui/app/mirrors/create/cdc/cdc.tsx @@ -12,6 +12,8 @@ import TableMapping from './tablemapping'; interface MirrorConfigProps { settings: MirrorSetting[]; mirrorConfig: CDCConfig; + destinationType: DBType; + sourceType: DBType; setter: MirrorSetter; rows: TableMapRow[]; setRows: Dispatch>; @@ -33,6 +35,8 @@ export const defaultSyncMode = (dtype: DBType | undefined) => { export default function CDCConfigForm({ settings, mirrorConfig, + destinationType, + sourceType, setter, rows, setRows, @@ -45,22 +49,24 @@ export default function CDCConfigForm({ setting.stateHandler(stateVal, setter); }; - const normalSettings = useMemo(() => { - return settings!.filter( - (setting) => - !( - (IsQueuePeer(mirrorConfig.destination?.type) && - setting.advanced === AdvancedSettingType.QUEUE) || - setting.advanced === AdvancedSettingType.ALL - ) - ); - }, [settings, mirrorConfig.destination?.type]); + const normalSettings = useMemo( + () => + settings!.filter( + (setting) => + !( + (IsQueuePeer(destinationType) && + setting.advanced === AdvancedSettingType.QUEUE) || + setting.advanced === AdvancedSettingType.ALL + ) + ), + [settings, destinationType] + ); const advancedSettings = useMemo(() => { return settings! .map((setting) => { if ( - IsQueuePeer(mirrorConfig.destination?.type) && + IsQueuePeer(destinationType) && setting.advanced === AdvancedSettingType.QUEUE ) { setting.stateHandler(600, setter); @@ -71,30 +77,29 @@ export default function CDCConfigForm({ } }) .filter((setting) => setting !== undefined); - }, [settings, mirrorConfig.destination?.type, setter]); + }, [settings, destinationType, setter]); const paramDisplayCondition = (setting: MirrorSetting) => { const label = setting.label.toLowerCase(); - const isQueue = IsQueuePeer(mirrorConfig.destination?.type); + const isQueue = IsQueuePeer(destinationType); if ( (label.includes('snapshot') && mirrorConfig.doInitialSnapshot !== true) || (label === 'replication slot name' && mirrorConfig.doInitialSnapshot === true) || (label.includes('staging path') && - defaultSyncMode(mirrorConfig.destination?.type) !== 'AVRO') || + defaultSyncMode(destinationType) !== 'AVRO') || (isQueue && label.includes('soft delete')) || - (mirrorConfig.destination?.type === DBType.EVENTHUBS && + (destinationType === DBType.EVENTHUBS && (label.includes('initial copy') || label.includes('initial load') || label.includes('snapshot'))) || - ((mirrorConfig.source?.type !== DBType.POSTGRES || - mirrorConfig.destination?.type !== DBType.POSTGRES) && + ((sourceType !== DBType.POSTGRES || + destinationType !== DBType.POSTGRES) && label.includes('type system')) || - (mirrorConfig.destination?.type !== DBType.BIGQUERY && - label.includes('column name')) || + (destinationType !== DBType.BIGQUERY && label.includes('column name')) || (label.includes('soft delete') && ![DBType.BIGQUERY, DBType.POSTGRES, DBType.SNOWFLAKE].includes( - mirrorConfig.destination?.type ?? DBType.UNRECOGNIZED + destinationType ?? DBType.UNRECOGNIZED )) ) { return false; @@ -104,17 +109,17 @@ export default function CDCConfigForm({ useEffect(() => { setPubLoading(true); - fetchPublications(mirrorConfig.source?.name || '').then((pubs) => { + fetchPublications(mirrorConfig.sourceName ?? '').then((pubs) => { setPublications(pubs); setPubLoading(false); }); - }, [mirrorConfig.source?.name]); + }, [mirrorConfig.sourceName]); - if (mirrorConfig.source != undefined && mirrorConfig.destination != undefined) + if (mirrorConfig.sourceName && mirrorConfig.destinationName) return ( <> - {normalSettings!.map((setting, id) => { - return ( + {normalSettings!.map( + (setting, id) => paramDisplayCondition(setting!) && ( ) - ); - })} + )}