From a514a6065c692f26a7f763125d6726f3f2364dc3 Mon Sep 17 00:00:00 2001 From: Pengfei Xuan Date: Fri, 23 Feb 2024 18:10:57 -0500 Subject: [PATCH 1/3] Fix libtorch 2.2 build error --- .github/workflows/cuda/Linux.sh | 9 +++++---- .github/workflows/ubuntu.yml | 18 ++++++++++++++---- model.cpp | 10 +++++----- 3 files changed, 24 insertions(+), 13 deletions(-) diff --git a/.github/workflows/cuda/Linux.sh b/.github/workflows/cuda/Linux.sh index e9106dd..8777f41 100644 --- a/.github/workflows/cuda/Linux.sh +++ b/.github/workflows/cuda/Linux.sh @@ -8,8 +8,8 @@ case ${1} in cu121) CUDA=12.1 APT_KEY=${OS}-${CUDA/./-}-local - FILENAME=cuda-repo-${APT_KEY}_${CUDA}.0-530.30.02-1_amd64.deb - URL=https://developer.download.nvidia.com/compute/cuda/${CUDA}.0/local_installers + FILENAME=cuda-repo-${APT_KEY}_${CUDA}.1-530.30.02-1_amd64.deb + URL=https://developer.download.nvidia.com/compute/cuda/${CUDA}.1/local_installers ;; cu118) CUDA=11.8 @@ -64,7 +64,8 @@ else sudo apt-key add /var/cuda-repo-${APT_KEY}/7fa2af80.pub fi -sudo apt-get update -sudo apt-get -y install cuda +sudo apt-get -qq update +sudo apt install -y cuda-nvcc-${CUDA/./-} cuda-libraries-dev-${CUDA/./-} cuda-command-line-tools-${CUDA/./-} +sudo apt clean rm -f ${FILENAME} \ No newline at end of file diff --git a/.github/workflows/ubuntu.yml b/.github/workflows/ubuntu.yml index e0de45a..7dfad71 100644 --- a/.github/workflows/ubuntu.yml +++ b/.github/workflows/ubuntu.yml @@ -3,7 +3,7 @@ name: OpenSplat (Ubuntu) on: push: branches: - - main + - fix-libtorch-2.2 pull_request: types: [ assigned, opened, synchronize, reopened ] release: @@ -17,9 +17,13 @@ jobs: fail-fast: false matrix: os: [ubuntu-22.04, ubuntu-20.04] # [ubuntu-22.04, ubuntu-20.04, ubuntu-18.04] - torch-version: [2.1.2] # [1.12.0, 1.13.0, 2.0.0, 2.1.0, 2.1.1, 2.1.2, 2.2.0] + torch-version: [2.1.2, 2.2.1] # [1.12.0, 1.13.0, 2.0.0, 2.1.0, 2.1.1, 2.1.2, 2.2.0, 2.2.1] cuda-version: ['cu118', 'cu121'] # ['cpu', 'cu113', 'cu116', 'cu117'] cmake-build-type: [Release] # [Debug, ClangTidy] + exclude: + - os: ubuntu-20.04 + - cuda-version: cu118 + - torch-version: 2.1.2 env: CCACHE_DIR: ${{ github.workspace }}/ccache @@ -36,12 +40,18 @@ jobs: sudo apt-get update sudo apt-get install -y \ build-essential \ - ccache \ cmake \ ninja-build \ libopencv-dev \ wget + - name: Install ccache + run: | + wget -nv https://github.com/ccache/ccache/releases/download/v4.9.1/ccache-4.9.1-linux-x86_64.tar.xz + sudo tar xf ccache-4.9.1-linux-x86_64.tar.xz -C /usr/bin --strip-components=1 --no-same-owner ccache-4.9.1-linux-x86_64/ccache + rm -f ccache-*-linux-x86_64.tar.xz + ccache --version + - name: Install CUDA ${{ matrix.cuda-version }} if: ${{ matrix.cuda-version != 'cpu' }} run: | @@ -83,7 +93,7 @@ jobs: -DCMAKE_PREFIX_PATH=${{ github.workspace }}/libtorch \ -DCMAKE_INSTALL_PREFIX=${{github.workspace}}/install \ -DCUDA_TOOLKIT_ROOT_DIR=$CUDA_HOME - ninja -k 8 + ninja - name: Clean compiler cache run: | diff --git a/model.cpp b/model.cpp index 6725c77..6ee792d 100644 --- a/model.cpp +++ b/model.cpp @@ -187,7 +187,7 @@ int Model::getDownscaleFactor(int step){ void Model::addToOptimizer(torch::optim::Adam *optimizer, const torch::Tensor &newParam, const torch::Tensor &idcs, int nSamples){ torch::Tensor param = optimizer->param_groups()[0].params()[0]; - auto pId = c10::guts::to_string(param.unsafeGetTensorImpl()); + auto pId = param.unsafeGetTensorImpl(); auto paramState = std::make_unique(static_cast(*optimizer->state()[pId])); std::vector repeats; @@ -208,21 +208,21 @@ void Model::addToOptimizer(torch::optim::Adam *optimizer, const torch::Tensor &n optimizer->state().erase(pId); - auto newPId = c10::guts::to_string(newParam.unsafeGetTensorImpl()); + auto newPId = newParam.unsafeGetTensorImpl(); optimizer->state()[newPId] = std::move(paramState); optimizer->param_groups()[0].params()[0] = newParam; } void Model::removeFromOptimizer(torch::optim::Adam *optimizer, const torch::Tensor &newParam, const torch::Tensor &deletedMask){ torch::Tensor param = optimizer->param_groups()[0].params()[0]; - auto pId = c10::guts::to_string(param.unsafeGetTensorImpl()); + auto pId = param.unsafeGetTensorImpl(); auto paramState = std::make_unique(static_cast(*optimizer->state()[pId])); paramState->exp_avg(paramState->exp_avg().index({~deletedMask})); paramState->exp_avg_sq(paramState->exp_avg_sq().index({~deletedMask})); optimizer->state().erase(pId); - auto newPId = c10::guts::to_string(newParam.unsafeGetTensorImpl()); + auto newPId = newParam.unsafeGetTensorImpl(); optimizer->param_groups()[0].params()[0] = newParam; optimizer->state()[newPId] = std::move(paramState); } @@ -383,7 +383,7 @@ void Model::afterTrain(int step){ // Reset optimizer torch::Tensor param = opacitiesOpt->param_groups()[0].params()[0]; - auto pId = c10::guts::to_string(param.unsafeGetTensorImpl()); + auto pId = param.unsafeGetTensorImpl(); auto paramState = std::make_unique(static_cast(*opacitiesOpt->state()[pId])); paramState->exp_avg(torch::zeros_like(paramState->exp_avg())); paramState->exp_avg_sq(torch::zeros_like(paramState->exp_avg_sq())); From a56fbc7b155843637c7dde37398d2472210d91d9 Mon Sep 17 00:00:00 2001 From: Pengfei Xuan Date: Fri, 23 Feb 2024 19:59:24 -0500 Subject: [PATCH 2/3] Make the build backward compatible with libtorch version < 2.2 --- .github/workflows/ubuntu.yml | 3 +-- model.cpp | 22 +++++++++++++++++++++- model.hpp | 1 + 3 files changed, 23 insertions(+), 3 deletions(-) diff --git a/.github/workflows/ubuntu.yml b/.github/workflows/ubuntu.yml index 7dfad71..17a669b 100644 --- a/.github/workflows/ubuntu.yml +++ b/.github/workflows/ubuntu.yml @@ -21,9 +21,8 @@ jobs: cuda-version: ['cu118', 'cu121'] # ['cpu', 'cu113', 'cu116', 'cu117'] cmake-build-type: [Release] # [Debug, ClangTidy] exclude: - - os: ubuntu-20.04 - cuda-version: cu118 - - torch-version: 2.1.2 + - torch-version: 2.2.1 env: CCACHE_DIR: ${{ github.workspace }}/ccache diff --git a/model.cpp b/model.cpp index 6ee792d..c9ff302 100644 --- a/model.cpp +++ b/model.cpp @@ -187,7 +187,11 @@ int Model::getDownscaleFactor(int step){ void Model::addToOptimizer(torch::optim::Adam *optimizer, const torch::Tensor &newParam, const torch::Tensor &idcs, int nSamples){ torch::Tensor param = optimizer->param_groups()[0].params()[0]; +#if TORCH_VERSION_MAJOR == 2 && TORCH_VERSION_MINOR > 1 auto pId = param.unsafeGetTensorImpl(); +#else + auto pId = c10::guts::to_string(param.unsafeGetTensorImpl()); +#endif auto paramState = std::make_unique(static_cast(*optimizer->state()[pId])); std::vector repeats; @@ -208,21 +212,33 @@ void Model::addToOptimizer(torch::optim::Adam *optimizer, const torch::Tensor &n optimizer->state().erase(pId); +#if TORCH_VERSION_MAJOR == 2 && TORCH_VERSION_MINOR > 1 auto newPId = newParam.unsafeGetTensorImpl(); +#else + auto newPId = c10::guts::to_string(newParam.unsafeGetTensorImpl()); +#endif optimizer->state()[newPId] = std::move(paramState); optimizer->param_groups()[0].params()[0] = newParam; } void Model::removeFromOptimizer(torch::optim::Adam *optimizer, const torch::Tensor &newParam, const torch::Tensor &deletedMask){ torch::Tensor param = optimizer->param_groups()[0].params()[0]; +#if TORCH_VERSION_MAJOR == 2 && TORCH_VERSION_MINOR > 1 auto pId = param.unsafeGetTensorImpl(); +#else + auto pId = c10::guts::to_string(param.unsafeGetTensorImpl()); +#endif auto paramState = std::make_unique(static_cast(*optimizer->state()[pId])); paramState->exp_avg(paramState->exp_avg().index({~deletedMask})); paramState->exp_avg_sq(paramState->exp_avg_sq().index({~deletedMask})); optimizer->state().erase(pId); +#if TORCH_VERSION_MAJOR == 2 && TORCH_VERSION_MINOR > 1 auto newPId = newParam.unsafeGetTensorImpl(); +#else + auto newPId = c10::guts::to_string(newParam.unsafeGetTensorImpl()); +#endif optimizer->param_groups()[0].params()[0] = newParam; optimizer->state()[newPId] = std::move(paramState); } @@ -383,7 +399,11 @@ void Model::afterTrain(int step){ // Reset optimizer torch::Tensor param = opacitiesOpt->param_groups()[0].params()[0]; - auto pId = param.unsafeGetTensorImpl(); + #if TORCH_VERSION_MAJOR == 2 && TORCH_VERSION_MINOR > 1 + auto pId = param.unsafeGetTensorImpl(); + #else + auto pId = c10::guts::to_string(param.unsafeGetTensorImpl()); + #endif auto paramState = std::make_unique(static_cast(*opacitiesOpt->state()[pId])); paramState->exp_avg(torch::zeros_like(paramState->exp_avg())); paramState->exp_avg_sq(torch::zeros_like(paramState->exp_avg_sq())); diff --git a/model.hpp b/model.hpp index a22c51f..29e373a 100644 --- a/model.hpp +++ b/model.hpp @@ -3,6 +3,7 @@ #include #include +#include #include "nerfstudio.hpp" #include "kdtree_tensor.hpp" #include "spherical_harmonics.hpp" From 4787145c8d9463e18ba980d0ff9e83af58598abb Mon Sep 17 00:00:00 2001 From: Pengfei Xuan Date: Fri, 23 Feb 2024 20:10:02 -0500 Subject: [PATCH 3/3] activate the build matrix across ubuntu-22.04/20.04, cuda-11.8/12.1, and libtorch 2.1.2/2.2.1 --- .github/workflows/ubuntu.yml | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/.github/workflows/ubuntu.yml b/.github/workflows/ubuntu.yml index 17a669b..5668b5a 100644 --- a/.github/workflows/ubuntu.yml +++ b/.github/workflows/ubuntu.yml @@ -3,7 +3,7 @@ name: OpenSplat (Ubuntu) on: push: branches: - - fix-libtorch-2.2 + - main pull_request: types: [ assigned, opened, synchronize, reopened ] release: @@ -20,9 +20,6 @@ jobs: torch-version: [2.1.2, 2.2.1] # [1.12.0, 1.13.0, 2.0.0, 2.1.0, 2.1.1, 2.1.2, 2.2.0, 2.2.1] cuda-version: ['cu118', 'cu121'] # ['cpu', 'cu113', 'cu116', 'cu117'] cmake-build-type: [Release] # [Debug, ClangTidy] - exclude: - - cuda-version: cu118 - - torch-version: 2.2.1 env: CCACHE_DIR: ${{ github.workspace }}/ccache