Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: integrate iceberg initial load support via jvm grpc #1792

Draft
wants to merge 35 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from 18 commits
Commits
Show all changes
35 commits
Select commit Hold shift + click to select a range
27f7fb5
feat: integrate iceberg support via jvm grpc
iamKunalGupta Jun 5, 2024
94fffcd
feat: Enhance Iceberg support and refactor codebase
iamKunalGupta Jun 11, 2024
68af0b9
chore: nit fixes
iamKunalGupta Jun 11, 2024
81cd311
chore: merge branch 'main' into feat/iceberg-support
iamKunalGupta Jun 11, 2024
4c88206
chore: some MORE nit fixes
iamKunalGupta Jun 11, 2024
004c989
chore: update quarkus cli version
iamKunalGupta Jun 11, 2024
225e30b
chore: fix pr comments
iamKunalGupta Jun 11, 2024
5a62168
fix: build and add multi-stage docker file
iamKunalGupta Jun 11, 2024
c28c10e
feat(iceberg): add hive3 support and test GCS integration
iamKunalGupta Jun 13, 2024
f72adde
chore: remove check for sdkman version
iamKunalGupta Jun 13, 2024
77d18f3
chore: switch from sdkman to setupo-java
iamKunalGupta Jun 13, 2024
9db73e7
chore: add explicit proto dependency
iamKunalGupta Jun 13, 2024
fb955ec
chore: merge branch 'main' into feat/iceberg-support
iamKunalGupta Jun 13, 2024
1bbe868
chore: remove extra log setting
iamKunalGupta Jun 13, 2024
7fe52e7
feat: Enhance logging and dependency configurations
iamKunalGupta Jun 14, 2024
de5c69c
fix: Correct namespace existence check in IcebergService
iamKunalGupta Jun 14, 2024
efef826
feat(iceberg): add streaming support to improve performance and memor…
iamKunalGupta Jun 17, 2024
d2ab4a3
fix: path style access
iamKunalGupta Jun 17, 2024
2e781de
Iceberg peer UI (#1840)
Amogh-Bharadwaj Jun 18, 2024
a8642b6
chore: merge branch 'main' into feat/iceberg-support
iamKunalGupta Jun 18, 2024
06c68e5
feat: Improve error handling and record appending in IcebergService
iamKunalGupta Jun 20, 2024
93830a0
chore: add editor config for jvm
iamKunalGupta Jun 20, 2024
5bbb18e
chore: try switching gh runner to fix build
iamKunalGupta Jun 20, 2024
237ac32
chore: lint fix
iamKunalGupta Jun 20, 2024
a6c83db
feat(LockManager): change scope from Dependent to ApplicationScoped
iamKunalGupta Jun 24, 2024
e0939b8
chore: merge branch 'main' into feat/iceberg-support
iamKunalGupta Jun 24, 2024
79129a9
feat: enable streaming and nit fixes
iamKunalGupta Jun 24, 2024
55a19ab
fix: UI showing Iceberg as unrecognized
iamKunalGupta Jun 24, 2024
2dff0bc
fix: setup for iceberg
iamKunalGupta Jun 24, 2024
6ea5137
chore: dependency updates
iamKunalGupta Jun 24, 2024
78a5468
chore: nit fixes
iamKunalGupta Jun 24, 2024
1f8aad5
chore: update quarkus cli version in sdkmanrc
iamKunalGupta Jun 24, 2024
0c5d016
chore: cleaning up code
iamKunalGupta Jun 25, 2024
74bca1b
feat: identifier fields support
iamKunalGupta Jun 25, 2024
cb190cb
chore: lint fix
iamKunalGupta Jun 25, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions .dockerignore
Original file line number Diff line number Diff line change
Expand Up @@ -13,3 +13,12 @@ ui/.next
.gitignore
.github
.gitmodules


build
*.jar
.idea
.gradle
.dockerignore
.sdkmanrc

2 changes: 1 addition & 1 deletion .github/actions/genprotos/action.yml
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ runs:
./flow/generated/protos
./nexus/pt/src/gen
./ui/grpc_generated
key: ${{ runner.os }}-build-genprotos-${{ hashFiles('./protos/peers.proto', './protos/flow.proto', './protos/route.proto') }}
key: ${{ runner.os }}-build-genprotos-${{ hashFiles('./protos/peers.proto', './protos/flow.proto', './protos/route.proto', './protos/flow-jvm.proto') }}

- if: steps.cache.outputs.cache-hit != 'true'
uses: actions/setup-go@v5
Expand Down
38 changes: 38 additions & 0 deletions .github/workflows/flow-jvm-build.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
name: Build & Test Flow JVM

on:
push:
branches: [main]
pull_request:
branches: [main]
paths: [flow-jvm/**, protos/**, .sdkmanrc]

jobs:
build-test:
name: Build & Test Flow JVM
strategy:
matrix:
runner: [ubicloud-standard-2-ubuntu-2204-arm]
runs-on: ${{ matrix.runner }}
steps:
- name: checkout
uses: actions/checkout@v4

- uses: actions/setup-java@v4
with:
distribution: 'temurin'
java-version: '21'
cache: 'gradle'

- name: Check dependency versions
run: |
java -version
gradle -version
working-directory: flow-jvm
- name: Download/Setup gradle wrapper
working-directory: flow-jvm
run: |
gradle wrapper
- name: Build
working-directory: flow-jvm
run: ./gradlew quarkusGenerateCode --stacktrace --info && ./gradlew build test --stacktrace --info
5 changes: 5 additions & 0 deletions .sdkmanrc
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
# Enable auto-env through the sdkman_auto_env config
# Add key=value pairs of SDKs to use below
java=21.0.3-tem
quarkus=3.11.1
gradle=8.6
17 changes: 16 additions & 1 deletion docker-bake.hcl
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,8 @@ group "default" {
"flow-worker",
"flow-api",
"flow-snapshot-worker",
"peerdb-ui"
"peerdb-ui",
"flow-jvm",
]
}

Expand Down Expand Up @@ -90,3 +91,17 @@ target "peerdb-ui" {
"${REGISTRY}/peerdb-ui:${SHA_SHORT}",
]
}

target "flow-jvm" {
context = "."
dockerfile = "stacks/jvm.Dockerfile"
target = "runner"
platforms = [
"linux/amd64",
"linux/arm64",
]
tags = [
"${REGISTRY}/flow-jvm:${TAG}",
"${REGISTRY}/flow-jvm:${SHA_SHORT}",
]
}
13 changes: 13 additions & 0 deletions flow-jvm/.dockerignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
#*
#!build/*-runner
#!build/*-runner.jar
#!build/lib/*
#!build/quarkus-app/*
build
*.jar
.idea
.gradle
src/main/docker
.dockerignore
.git
.sdkmanrc
3 changes: 3 additions & 0 deletions flow-jvm/.gitattributes
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
gradlew linguist-generated=true
gradlew.bat linguist-generated=true

41 changes: 41 additions & 0 deletions flow-jvm/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
# Gradle
.gradle/
build/

# Eclipse
.project
.classpath
.settings/
bin/

# IntelliJ
.idea
*.ipr
*.iml
*.iws

# NetBeans
nb-configuration.xml

# Visual Studio Code
.vscode
.factorypath

# OSX
.DS_Store

# Vim
*.swp
*.swo

# patch
*.orig
*.rej

# Local environment
.env

# Plugin directory
/.quarkus/cli/plugins/

*.jar
1 change: 1 addition & 0 deletions flow-jvm/.sdkmanrc
73 changes: 73 additions & 0 deletions flow-jvm/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
# flow-jvm


## Dependencies
Install sdkman and run `sdk env install` to setup the environment and run `gradle wrapper` to install gradle wrapper.



This project uses Quarkus, the Supersonic Subatomic Java Framework.

If you want to learn more about Quarkus, please visit its website: https://quarkus.io/ .


## Generate the java code from proto files
```shell script
./gradlew quarkusGenerateCode
```

or
```shell
./gradlew clean quarkusGenerateCode
```

## Running the application in dev mode (`quarkusGenerateCode` can be skipped)

You can run your application in dev mode that enables live coding using:
```shell script
./gradlew quarkusDev
```

> **_NOTE:_** Quarkus now ships with a Dev UI, which is available in dev mode only at http://localhost:9801/q/dev/.

## Packaging and running the application

The application can be packaged using:
```shell script
./gradlew build
```
It produces the `quarkus-run.jar` file in the `build/quarkus-app/` directory.
Be aware that it’s not an _über-jar_ as the dependencies are copied into the `build/quarkus-app/lib/` directory.
iamKunalGupta marked this conversation as resolved.
Show resolved Hide resolved

The application is now runnable using `java -jar build/quarkus-app/quarkus-run.jar`.

If you want to build an _über-jar_, execute the following command:
```shell script
./gradlew build -Dquarkus.package.jar.type=uber-jar
```

The application, packaged as an _über-jar_, is now runnable using `java -jar build/*-runner.jar`.

## Creating a native executable

You can create a native executable using:
```shell script
./gradlew build -Dquarkus.native.enabled=true
```

Or, if you don't have GraalVM installed, you can run the native executable build in a container using:
```shell script
./gradlew build -Dquarkus.native.enabled=true -Dquarkus.native.container-build=true
```

You can then execute your native executable with: `./build/flow-jvm-0.0.1-SNAPSHOT-runner`

If you want to learn more about building native executables, please consult https://quarkus.io/guides/gradle-tooling.

## Provided Code

### REST

Easily start your REST Web Services

[Related guide section...](https://quarkus.io/guides/getting-started-reactive#reactive-jax-rs-resources)
113 changes: 113 additions & 0 deletions flow-jvm/build.gradle
Original file line number Diff line number Diff line change
@@ -0,0 +1,113 @@
plugins {
id 'java'
id 'io.quarkus'
}

repositories {
mavenCentral()
mavenLocal()
}

ext {
icebergLibVersion = '1.5.2'
}


dependencies {
implementation 'io.quarkus:quarkus-config-yaml'
implementation enforcedPlatform("${quarkusPlatformGroupId}:${quarkusPlatformArtifactId}:${quarkusPlatformVersion}")
implementation 'io.quarkus:quarkus-arc'
implementation 'io.quarkus:quarkus-grpc'
implementation 'io.quarkus:quarkus-rest'
implementation 'io.quarkus:quarkus-logging-json'

// Logging adapter for dependencies, also prevents duplicate slf4j binding warnings https://quarkus.io/guides/logging#logging-apis
implementation("org.jboss.logging:commons-logging-jboss-logging")
implementation("org.jboss.logmanager:log4j-jboss-logmanager")
implementation("org.jboss.logmanager:log4j2-jboss-logmanager")
implementation("org.jboss.slf4j:slf4j-jboss-logmanager")

implementation "org.apache.iceberg:iceberg-core:${icebergLibVersion}"
implementation "org.apache.iceberg:iceberg-common:${icebergLibVersion}"
implementation "org.apache.iceberg:iceberg-data:${icebergLibVersion}"
implementation "org.apache.iceberg:iceberg-parquet:${icebergLibVersion}"

// This is forced due to version conflicts
implementation 'io.grpc:grpc-protobuf:1.63.0'
implementation 'com.google.protobuf:protobuf-java:4.27.1'

implementation 'org.apache.avro:avro:1.11.3'

implementation 'org.apache.hadoop:hadoop-client:3.4.0'
implementation 'org.apache.hadoop:hadoop-common:3.4.0'

// Drivers for JDBC Catalogs
runtimeOnly 'org.postgresql:postgresql:42.7.3'

// AWS Dependencies
implementation "org.apache.iceberg:iceberg-aws:${icebergLibVersion}"
runtimeOnly "org.apache.iceberg:iceberg-aws-bundle:${icebergLibVersion}"
// runtimeOnly 'software.amazon.awssdk:bundle:2.25.60'

// // HIVE4 Dependencies
// implementation 'org.apache.hive:hive-iceberg-catalog:4.0.0'
// // DO NOT USE THE BELOW DEPENDENCIES https://github.com/apache/iceberg/issues/10429
//// implementation "org.apache.iceberg:iceberg-hive-metastore:${icebergLibVersion}"
//// runtimeOnly "org.apache.hive:hive-metastore:4.0.0"

// HIVE 3 Dependencies
implementation "org.apache.iceberg:iceberg-hive-metastore:${icebergLibVersion}"
runtimeOnly "org.apache.hive:hive-metastore:3.1.3"




// GCP Dependencies
implementation "org.apache.iceberg:iceberg-gcp:${icebergLibVersion}"
// This is currently causing issues with GRPC versions mismatch
runtimeOnly "org.apache.iceberg:iceberg-gcp-bundle:${icebergLibVersion}"


testImplementation 'io.quarkus:quarkus-junit5'
testImplementation 'io.rest-assured:rest-assured'

}


configurations.configureEach {
// // ch.qos.logback:logback-core
// exclude group: 'ch.qos.logback', module: 'logback-core'
// ch.qos.logback:logback-classic
exclude group: 'ch.qos.logback', module: 'logback-classic'
// org.apache.logging.log4j:log4j-slf4j-impl
exclude group: 'org.apache.logging.log4j', module: 'log4j-slf4j-impl'
// org.slf4j:slf4j-reload4j
exclude group: 'org.slf4j', module: 'slf4j-reload4j'
}


group 'io.peerdb'
version '0.0.1-SNAPSHOT'

java {
sourceCompatibility = JavaVersion.VERSION_21
targetCompatibility = JavaVersion.VERSION_21
}

test {
systemProperty "java.util.logging.manager", "org.jboss.logmanager.LogManager"
}
compileJava {
options.encoding = 'UTF-8'
options.compilerArgs << '-parameters'
}

compileTestJava {
options.encoding = 'UTF-8'
}


quarkus {
quarkusBuildProperties.put("quarkus.grpc.codegen.proto-directory", "${project.projectDir}/../protos")
quarkusBuildProperties.put("quarkus.grpc.codegen.exclude-filter", ".*")
}
7 changes: 7 additions & 0 deletions flow-jvm/gradle.properties
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
#Gradle properties
#Fri May 24 04:59:17 IST 2024
quarkusPlatformArtifactId=quarkus-bom
quarkusPlatformGroupId=io.quarkus.platform
quarkusPlatformVersion=3.11.1
quarkusPluginId=io.quarkus
quarkusPluginVersion=3.11.1
7 changes: 7 additions & 0 deletions flow-jvm/gradle/wrapper/gradle-wrapper.properties
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
distributionBase=GRADLE_USER_HOME
distributionPath=wrapper/dists
distributionUrl=https\://services.gradle.org/distributions/gradle-8.6-bin.zip
networkTimeout=10000
validateDistributionUrl=true
zipStoreBase=GRADLE_USER_HOME
zipStorePath=wrapper/dists
Loading
Loading