Merge pull request #33 from pierotofy/colmap

COLMAP project support
pierotofy · Mar 5, 2024 · 53ee4e5 · 53ee4e5
2 parents 04ed7fb + dd89a17
commit 53ee4e5
Show file tree

Hide file tree

Showing 15 changed files with 527 additions and 268 deletions.
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -35,7 +35,7 @@ target_include_directories(gsplat PRIVATE
 set_target_properties(gsplat PROPERTIES LINKER_LANGUAGE CXX)
 set_target_properties(gsplat PROPERTIES CUDA_ARCHITECTURES "70;75")
 
-add_executable(opensplat opensplat.cpp point_io.cpp nerfstudio.cpp model.cpp kdtree_tensor.cpp spherical_harmonics.cpp cv_utils.cpp utils.cpp project_gaussians.cpp rasterize_gaussians.cpp ssim.cpp optim_scheduler.cpp)
+add_executable(opensplat opensplat.cpp point_io.cpp nerfstudio.cpp model.cpp kdtree_tensor.cpp spherical_harmonics.cpp cv_utils.cpp utils.cpp project_gaussians.cpp rasterize_gaussians.cpp ssim.cpp optim_scheduler.cpp colmap.cpp input_data.cpp tensor_math.cpp)
 target_include_directories(opensplat PRIVATE ${PROJECT_SOURCE_DIR}/vendor/glm)
 target_link_libraries(opensplat PUBLIC ${STDPPFS_LIBRARY} cuda gsplat ${TORCH_LIBRARIES} ${OpenCV_LIBS})
 

diff --git a/README.md b/README.md
@@ -4,7 +4,7 @@ A free and open source implementation of 3D gaussian splatting written in C++, f
 
 ![OpenSplat](https://github.com/pierotofy/OpenSplat/assets/1951843/3461e0e4-e134-4d6a-8a56-d89d00258e41)
 
-OpenSplat takes camera poses + sparse points and computes a [scene file](https://drive.google.com/file/d/1w-CBxyWNXF3omA8B_IeOsRmSJel3iwyr/view?usp=sharing) (.ply) that can be later imported for viewing, editing and rendering in other [software](https://github.com/MrNeRF/awesome-3D-gaussian-splatting?tab=readme-ov-file#open-source-implementations).
+OpenSplat takes camera poses + sparse points (in [COLMAP](https://colmap.github.io/) or [nerfstudio](https://docs.nerf.studio/quickstart/custom_dataset.html) project format) and computes a [scene file](https://drive.google.com/file/d/1w-CBxyWNXF3omA8B_IeOsRmSJel3iwyr/view?usp=sharing) (.ply) that can be later imported for viewing, editing and rendering in other [software](https://github.com/MrNeRF/awesome-3D-gaussian-splatting?tab=readme-ov-file#open-source-implementations).
 
 Commercial use allowed and encouraged under the terms of the [AGPLv3](https://www.tldrlegal.com/license/gnu-affero-general-public-license-v3-agpl-3-0). ✅
 
@@ -28,7 +28,9 @@ Requirements:
  The software has been tested on Ubuntu 20.04 and Windows. With some changes it could run on macOS (help us by opening a PR?).
 
 ## Build Docker Image
+
 Navigate to the root directory of OpenSplat repo that has Dockerfile and run the following command to build the Docker image:
+
 ```bash
 docker build -t opensplat .
 ```
@@ -60,10 +62,7 @@ Wrote splat.ply
 
 The output `splat.ply` can then be dragged and dropped in one of the many [viewers](https://github.com/MrNeRF/awesome-3D-gaussian-splatting?tab=readme-ov-file#viewers) such as  https://playcanvas.com/viewer. You can also edit/cleanup the scene using https://playcanvas.com/supersplat/editor
 
-To run on your own data, choose the path to an existing [nerfstudio](https://docs.nerf.studio/) project. The project must have sparse points included (random initialization is not supported, see https://github.com/pierotofy/OpenSplat/issues/7). You can generate nerfstudio projects from [COLMAP](https://github.com/colmap/colmap/) by using nerfstudio's `ns-process-data` command: https://docs.nerf.studio/quickstart/custom_dataset.html
-
-
-We have plans to add support for reading COLMAP projects directly in the near future. See https://github.com/pierotofy/OpenSplat/issues/1
+To run on your own data, choose the path to an existing [COLMAP](https://colmap.github.io/) or [nerfstudio](https://docs.nerf.studio/) project. The project must have sparse points included (random initialization is not supported, see https://github.com/pierotofy/OpenSplat/issues/7).
 
 There's several parameters you can tune. To view the full list:
 

diff --git a/colmap.cpp b/colmap.cpp
@@ -0,0 +1,154 @@
+#include <filesystem>
+#include "colmap.hpp"
+#include "point_io.hpp"
+#include "tensor_math.hpp"
+
+namespace fs = std::filesystem;
+using namespace torch::indexing;
+
+namespace cm{
+
+InputData inputDataFromColmap(const std::string &projectRoot){
+    InputData ret;
+    fs::path cmRoot(projectRoot);
+
+    if (!fs::exists(cmRoot / "cameras.bin") && fs::exists(cmRoot / "sparse" / "0" / "cameras.bin")){
+        cmRoot = cmRoot / "sparse" / "0";
+    }
+
+    fs::path camerasPath = cmRoot / "cameras.bin";
+    fs::path imagesPath = cmRoot / "images.bin";
+    fs::path pointsPath = cmRoot / "points3D.bin";
+
+    if (!fs::exists(camerasPath)) throw std::runtime_error(camerasPath.string() + " does not exist");
+    if (!fs::exists(imagesPath)) throw std::runtime_error(imagesPath.string() + " does not exist");
+    if (!fs::exists(pointsPath)) throw std::runtime_error(pointsPath.string() + " does not exist");
+
+    std::ifstream camf(camerasPath.string(), std::ios::binary);
+    if (!camf.is_open()) throw std::runtime_error("Cannot open " + camerasPath.string());
+    std::ifstream imgf(imagesPath.string(), std::ios::binary);
+    if (!imgf.is_open()) throw std::runtime_error("Cannot open " + imagesPath.string());
+
+    size_t numCameras = readBinary<uint64_t>(camf);
+    std::vector<Camera> cameras(numCameras);
+
+    std::unordered_map<uint32_t, Camera *> camMap;
+
+    for (size_t i = 0; i < numCameras; i++) {
+        Camera *cam = &cameras[i];
+
+        cam->id = readBinary<uint32_t>(camf);
+
+        CameraModel model = static_cast<CameraModel>(readBinary<int>(camf)); // model ID
+        cam->width = readBinary<uint64_t>(camf);
+        cam->height = readBinary<uint64_t>(camf);
+
+        if (model == SimplePinhole){
+            cam->fx = readBinary<double>(camf);
+            cam->fy = cam->fx;
+            cam->cx = readBinary<double>(camf);
+            cam->cy = readBinary<double>(camf);
+        }else if (model == Pinhole){
+            cam->fx = readBinary<double>(camf);
+            cam->fy = readBinary<double>(camf);
+            cam->cx = readBinary<double>(camf);
+            cam->cy = readBinary<double>(camf);
+        }else if (model == OpenCV){
+            cam->fx = readBinary<double>(camf);
+            cam->fy = readBinary<double>(camf);
+            cam->cx = readBinary<double>(camf);
+            cam->cy = readBinary<double>(camf);
+            cam->k1 = readBinary<double>(camf);
+            cam->k2 = readBinary<double>(camf);
+            cam->p1 = readBinary<double>(camf);
+            cam->p2 = readBinary<double>(camf);
+        }else{
+            throw std::runtime_error("Unsupported camera model: " + std::to_string(model));
+        }
+
+        camMap[cam->id] = cam;
+    }
+
+    camf.close();
+
+
+    size_t numImages = readBinary<uint64_t>(imgf);
+    torch::Tensor unorientedPoses = torch::zeros({static_cast<long int>(numImages), 4, 4}, torch::kFloat32);
+
+    for (size_t i = 0; i < numImages; i++){
+        readBinary<uint32_t>(imgf); // imageId
+
+        torch::Tensor qVec = torch::tensor({
+            readBinary<double>(imgf),
+            readBinary<double>(imgf),
+            readBinary<double>(imgf),
+            readBinary<double>(imgf)
+        }, torch::kFloat32);
+        torch::Tensor R = quatToRotMat(qVec);
+        torch::Tensor T = torch::tensor({
+            { readBinary<double>(imgf) },
+            { readBinary<double>(imgf) },
+            { readBinary<double>(imgf) }
+        }, torch::kFloat32);
+
+        torch::Tensor Rinv = R.transpose(0, 1);
+        torch::Tensor Tinv = torch::matmul(-Rinv, T);
+
+        uint32_t camId = readBinary<uint32_t>(imgf);
+
+        Camera cam = *camMap[camId];
+
+        char ch = '\0';
+        std::string filePath = "";
+        while(true){
+            imgf.read(&ch, 1);
+            if (ch == '\0') break;
+            filePath += ch;
+        }
+
+        // TODO: should "images" be an option?
+        cam.filePath = (fs::path(projectRoot) / "images" / filePath).string();
+
+        unorientedPoses[i].index_put_({Slice(None, 3), Slice(None, 3)}, Rinv);
+        unorientedPoses[i].index_put_({Slice(None, 3), Slice(3, 4)}, Tinv);
+        unorientedPoses[i][3][3] = 1.0f;
+
+        // Convert COLMAP's camera CRS (OpenCV) to OpenGL
+        unorientedPoses[i].index_put_({Slice(0, 3), Slice(1,3)}, unorientedPoses[i].index({Slice(0, 3), Slice(1,3)}) * -1.0f);
+
+        size_t numPoints2D = readBinary<uint64_t>(imgf);
+        for (size_t j = 0; j < numPoints2D; j++){
+            readBinary<double>(imgf); // x
+            readBinary<double>(imgf); // y
+            readBinary<uint64_t>(imgf); // point3D ID
+        }
+
+        ret.cameras.push_back(cam);
+    }
+
+    imgf.close();
+
+    auto r = autoOrientAndCenterPoses(unorientedPoses);
+    torch::Tensor poses = std::get<0>(r);
+    ret.transformMatrix = std::get<1>(r);
+    ret.scaleFactor = 1.0f / torch::max(torch::abs(poses.index({Slice(), Slice(None, 3), 3}))).item<float>();
+    poses.index({Slice(), Slice(None, 3), 3}) *= ret.scaleFactor;
+
+    for (size_t i = 0; i < ret.cameras.size(); i++){
+        ret.cameras[i].camToWorld = poses[i];
+    }
+
+    PointSet *pSet = readPointSet(pointsPath.string());
+    torch::Tensor points = pSet->pointsTensor().clone();
+
+    ret.points.xyz = torch::matmul(torch::cat({points, torch::ones_like(points.index({"...", Slice(None, 1)}))}, -1), 
+                    ret.transformMatrix.transpose(0, 1));
+    ret.points.xyz *= ret.scaleFactor;
+    ret.points.rgb = pSet->colorsTensor().clone();
+
+    RELEASE_POINTSET(pSet);
+
+    return ret;
+}
+
+}
diff --git a/colmap.hpp b/colmap.hpp
@@ -0,0 +1,17 @@
+#ifndef COLMAP_H
+#define COLMAP_H
+
+#include <fstream>
+#include "input_data.hpp"
+
+namespace cm{
+    InputData inputDataFromColmap(const std::string &projectRoot);
+
+    enum CameraModel{
+        SimplePinhole = 0, Pinhole, SimpleRadial, Radial,
+        OpenCV, OpenCVFisheye, FullOpenCV, FOV, 
+        SimpleRadialFisheye, RadialFisheye, ThinPrismFisheye
+    };
+}
+
+#endif
diff --git a/input_data.cpp b/input_data.cpp
@@ -0,0 +1,151 @@
+#include <filesystem>
+#include "input_data.hpp"
+#include "cv_utils.hpp"
+
+namespace fs = std::filesystem;
+using namespace torch::indexing;
+
+namespace ns{ InputData inputDataFromNerfStudio(const std::string &projectRoot); }
+namespace cm{ InputData inputDataFromColmap(const std::string &projectRoot); }
+
+InputData inputDataFromX(const std::string &projectRoot){
+    fs::path root(projectRoot);
+
+    if (fs::exists(root / "transforms.json")){
+        return ns::inputDataFromNerfStudio(projectRoot);
+    }else if (fs::exists(root / "sparse") || fs::exists(root / "cameras.bin")){
+        return cm::inputDataFromColmap(projectRoot);
+    }else{
+        throw std::runtime_error("Invalid project folder (must be either a colmap or nerfstudio project folder)");
+    }
+}
+
+torch::Tensor Camera::getIntrinsicsMatrix(){
+    return torch::tensor({{fx, 0.0f, cx},
+                          {0.0f, fy, cy},
+                          {0.0f, 0.0f, 1.0f}}, torch::kFloat32);
+}
+
+void Camera::loadImage(float downscaleFactor){
+    // Populates image and K, then updates the camera parameters
+    // Caution: this function has destructive behaviors
+    // and should be called only once
+    if (image.numel()) std::runtime_error("loadImage already called");
+    std::cout << "Loading " << filePath << std::endl;
+
+    float scaleFactor = 1.0f / downscaleFactor;
+    cv::Mat cImg = imreadRGB(filePath);
+
+    float rescaleF = 1.0f;
+    // If camera intrinsics don't match the image dimensions 
+    if (cImg.rows != height || cImg.cols != width){
+        rescaleF = static_cast<float>(cImg.rows) / static_cast<float>(height);
+    }
+    fx *= scaleFactor * rescaleF;
+    fy *= scaleFactor * rescaleF;
+    cx *= scaleFactor * rescaleF;
+    cy *= scaleFactor * rescaleF;
+
+    if (downscaleFactor > 1.0f){
+        float f = 1.0f / downscaleFactor;
+        cv::resize(cImg, cImg, cv::Size(), f, f, cv::INTER_AREA);
+    }
+
+    K = getIntrinsicsMatrix();
+    cv::Rect roi;
+
+    if (hasDistortionParameters()){
+        // Undistort
+        std::vector<float> distCoeffs = undistortionParameters();
+        cv::Mat cK = floatNxNtensorToMat(K);
+        cv::Mat newK = cv::getOptimalNewCameraMatrix(cK, distCoeffs, cv::Size(cImg.cols, cImg.rows), 0, cv::Size(), &roi);
+
+        cv::Mat undistorted = cv::Mat::zeros(cImg.rows, cImg.cols, cImg.type());
+        cv::undistort(cImg, undistorted, cK, distCoeffs, newK);
+
+        image = imageToTensor(undistorted);
+        K = floatNxNMatToTensor(newK);
+    }else{
+        roi = cv::Rect(0, 0, cImg.cols, cImg.rows);
+        image = imageToTensor(cImg);
+    }
+
+    // Crop to ROI
+    image = image.index({Slice(roi.y, roi.y + roi.height), Slice(roi.x, roi.x + roi.width), Slice()});
+
+    // Update parameters
+    height = image.size(0);
+    width = image.size(1);
+    fx = K[0][0].item<float>();
+    fy = K[1][1].item<float>();
+    cx = K[0][2].item<float>();
+    cy = K[1][2].item<float>();
+}
+
+torch::Tensor Camera::getImage(int downscaleFactor){
+    if (downscaleFactor <= 1) return image;
+    else{
+
+        // torch::jit::script::Module container = torch::jit::load("gt.pt");
+        // return container.attr("val").toTensor();
+
+        if (imagePyramids.find(downscaleFactor) != imagePyramids.end()){
+            return imagePyramids[downscaleFactor];
+        }
+
+        // Rescale, store and return
+        cv::Mat cImg = tensorToImage(image);
+        cv::resize(cImg, cImg, cv::Size(cImg.cols / downscaleFactor, cImg.rows / downscaleFactor), 0.0, 0.0, cv::INTER_AREA);
+        torch::Tensor t = imageToTensor(cImg);
+        imagePyramids[downscaleFactor] = t;
+        return t;
+    }
+}
+
+bool Camera::hasDistortionParameters(){
+    return k1 != 0.0f || k2 != 0.0f || k3 != 0.0f || p1 != 0.0f || p2 != 0.0f;
+}
+
+std::vector<float> Camera::undistortionParameters(){
+    std::vector<float> p = { k1, k2, p1, p2, k3, 0.0f, 0.0f, 0.0f };
+    return p;
+}
+
+void Camera::scaleOutputResolution(float scaleFactor){
+    fx = fx * scaleFactor;
+    fy = fy * scaleFactor;
+    cx = cx * scaleFactor;
+    cy = cy * scaleFactor;
+    height = static_cast<int>(static_cast<float>(height) * scaleFactor);
+    width = static_cast<int>(static_cast<float>(width) * scaleFactor);
+}
+
+std::tuple<std::vector<Camera>, Camera *> InputData::getCameras(bool validate, const std::string &valImage){
+    if (!validate) return std::make_tuple(cameras, nullptr);
+    else{
+        size_t valIdx = -1;
+        std::srand(42);
+
+        if (valImage == "random"){
+            valIdx = std::rand() % cameras.size();
+        }else{
+            for (size_t i = 0; i < cameras.size(); i++){
+                if (fs::path(cameras[i].filePath).filename().string() == valImage){
+                    valIdx = i;
+                    break;
+                }
+            }
+            if (valIdx == -1) throw std::runtime_error(valImage + " not in the list of cameras");
+        }
+
+        std::vector<Camera> cams;
+        Camera *valCam = nullptr;
+
+        for (size_t i = 0; i < cameras.size(); i++){
+            if (i != valIdx) cams.push_back(cameras[i]);
+            else valCam = &cameras[i];
+        }
+
+        return std::make_tuple(cams, valCam);
+    }
+}