Skip to content

Commit

Permalink
Expand support for CUDA 11, 12, MacOS, and ROCm (#3)
Browse files Browse the repository at this point in the history
The ML Lib Builder now compiles libtensorflow, libtorch, and onnxruntime
for CUDA 11, CUDA 12, MacOS, and ROCm 5.7. These have been tested
on an internal HPE machine and intended for use with SmartSim
  • Loading branch information
ashao authored Sep 16, 2024
1 parent 2ac10fa commit 2028721
Show file tree
Hide file tree
Showing 28 changed files with 1,571 additions and 36 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -3,3 +3,4 @@ install
*.tgz
*.zip
*.tar.gz
slurm*.out
8 changes: 8 additions & 0 deletions .gitmodules
Original file line number Diff line number Diff line change
@@ -1,3 +1,11 @@
[submodule "pytorch"]
path = pytorch
url = https://github.com/pytorch/pytorch.git
shallow = true
[submodule "tensorflow"]
path = tensorflow
url = https://github.com/tensorflow/tensorflow.git
shallow = true
[submodule "onnxruntime"]
path = onnxruntime
url = https://github.com/microsoft/onnxruntime.git
124 changes: 89 additions & 35 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -24,26 +24,32 @@
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

PYTORCH_VERSION=2.0.1
OSX_ARCHITECTURE=arm64

TORCH_TARGET = libtorch-macos-$(OSX_ARCHITECTURE)-$(PYTORCH_VERSION).zip
TORCH_BUILD = $(PWD)/build/libtorch
TORCH_INSTALL = $(PWD)/install/libtorch
ifeq ($(ARCH_FILE),)
$(error Must specify ARCH_FILE)
else
include $(ARCH_FILE)
endif

INSTALL_DIR = $(PWD)/install
BUILD_DIR = $(PWD)/build

TORCH_ARCHIVE = $(INSTALL_DIR)/libtorch-$(PYTORCH_VERSION)-$(OS)-$(ARCHITECTURE)-$(STACK).tgz
TORCH_BUILD_DIR = $(BUILD_DIR)/libtorch
TORCH_INSTALL_DIR = $(INSTALL_DIR)/libtorch

TORCH_CMAKE_OPTIONS =
TORCH_CMAKE_OPTIONS += -DCMAKE_OSX_ARCHITECTURES=$(OSX_ARCHITECTURE)
TORCH_CMAKE_OPTIONS += -DUSE_MKL=OFF -DUSE_MKLDNN=OFF -DUSE_ITT=OFF
TORCH_CMAKE_OPTIONS += -DUSE_QNNPACK=OFF -DUSE_KINETO=OFF
TF_ARCHIVE = $(INSTALL_DIR)/libtensorflow-$(TF_VERSION)-$(OS)-$(ARCHITECTURE)-$(STACK).tgz
TF_INSTALL_DIR = $(INSTALL_DIR)/libtensorflow
# Note: TF uses its own build system; cannot specify a build directory

ONNXRT_ARCHIVE = $(INSTALL_DIR)/onnxruntime-$(ONNXRT_VERSION)-$(OS)-$(ARCHITECTURE)-$(STACK).tgz
ONNXRT_BUILD_DIR = $(BUILD_DIR)/onnxruntime
ONNXRT_INSTALL_DIR = $(INSTALL_DIR)/onnxruntime

.PHONY: help
help:
@grep "^# help\:" Makefile | grep -v grep | sed 's/\# help\: //' | sed 's/\# help\://'

ifneq ($(shell uname), Darwin)
$(error This tool requires Mac OSX)
endif

# help:
# help: ----Overview----
# help: This makefile can be used to builds ML backends for use on arm64. Generally
Expand All @@ -55,35 +61,83 @@ endif
# help: ----Meta targets----
# help: clean -- Cleans all build and install directories
.PHONY: clean
clean: clean_torch
clean: clean_torch clean_tensorflow clean_onnxruntime

# help:
# help: ----Build Targets----
# help: torch -- Builds libtorch
# help:
.PHONY: torch
torch: $(TORCH_TARGET)

# Checkout a specific version of Torch and update all of the torch submodules
.PHONY: checkout_torch
checkout_torch:
cd pytorch && git checkout v$(PYTORCH_VERSION) && \
git submodule foreach --recursive git reset --hard && \
git submodule update --init --recursive

$(TORCH_BUILD) $(TORCH_INSTALL):
## Torch section
$(TORCH_BUILD_DIR):
mkdir -p $@

.PHONY: build_torch
build_torch: $(TORCH_BUILD) $(TORCH_INSTALL) checkout_torch
cd $< && \
cmake -DCMAKE_INSTALL_PREFIX=$(TORCH_INSTALL) $(TORCH_CMAKE_OPTIONS) ../../pytorch && \
make install -j 6
$(TORCH_ARCHIVE): $(TORCH_ARCHIVE_MODS) compile_torch
cd $(INSTALL_DIR) && tar -czf $@ libtorch/

$(TORCH_TARGET): build_torch
cd install && zip -r ../$@ libtorch
# help: build_torch -- Builds libtorch
.PHONY: build_torch
build_torch: $(TORCH_ARCHIVE)

.PHONY: clean_torch
clean_torch:
rm -rf $(TORCH_BUILD) $(TORCH_TARGET) $(TORCH_INSTALL)
rm -rf $(TORCH_BUILD_DIR) $(TORCH_ARCHIVE) $(TORCH_INSTALL_DIR)
cd pytorch && git clean -fdx && git restore .
cd pytorch/third_party/kineto && git restore .

.PHONY: compile_torch
compile_torch: $(TORCH_BUILD_DIR) $(PYTORCH_PREBUILD_TARGETS)
cd $(TORCH_BUILD_DIR) && \
cmake -GNinja -DCMAKE_INSTALL_PREFIX=$(TORCH_INSTALL_DIR) -DPYTHON_EXECUTABLE=$$(which python) \
$(TORCH_CMAKE_OPTIONS) ../../pytorch && \
ninja install

.PHONY: clean_tensorflow
clean_tensorflow:
rm -rf $(TF_INSTALL_DIR)
cd tensorflow && \
bazel clean --expunge_async && \
git restore .

.PHONY: clean_onnxruntime
clean_onnxruntime:
rm -rf $(ONNXRT_INSTALL_DIR) $(ONNXRT_BUILD_DIR)
cd onnxruntime && \
git reset --hard && \
git clean -fdx && \
git restore .

## Tensorflow section
$(TF_INSTALL_DIR):
mkdir -p $@

$(TF_ARCHIVE): $(TF_PREBUILD_TARGETS) $(TF_INSTALL_DIR)
cd tensorflow && \
bazel build $(TF_BAZEL_OPTS) //tensorflow/tools/lib_package:libtensorflow
cp tensorflow/bazel-bin/tensorflow/tools/lib_package/libtensorflow.tar.gz $(TF_INSTALL_DIR)
cd $(TF_INSTALL_DIR) && tar -xzf libtensorflow.tar.gz && rm -f libtensorflow.tar.gz
cd $(INSTALL_DIR) && tar -czf $@ libtensorflow

# help: build_tensorflow -- Builds Tensorflow
.PHONY: build_tensorflow
build_tensorflow: $(TF_ARCHIVE)

## ONNX Runtime
compile_onnxruntime: $(ONNXRT_PREBUILD_TARGETS)
cd onnxruntime && \
git apply ../patches/onnxruntime/build.install.patch
cd onnxruntime && python tools/ci_build/build.py \
--config Release \
--build_dir=$(ONNXRT_BUILD_DIR) \
--compile_no_warning_as_error \
--parallel \
--skip_tests \
--install_dir=$(ONNXRT_INSTALL_DIR) \
--build_shared_lib \
$(ONNXRT_OPTIONS)

$(ONNXRT_ARCHIVE): compile_onnxruntime
cd $(ONNXRT_BUILD_DIR)/Release && make install
cd $(ONNXRT_INSTALL_DIR) && mv include/onnxruntime/* include && rm -rf include/onnxruntime && mv lib64 lib
cd $(INSTALL_DIR) && tar -czf $@ onnxruntime/

# help: build_onnxruntime -- Builds ONNX Runtime
.PHONY: build_onnxruntime
build_onnxruntime: $(ONNXRT_ARCHIVE)
89 changes: 89 additions & 0 deletions architectures/linux-cuda-11.8.0
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@

# BSD 2-Clause License
#
# Copyright (c) 2024, Hewlett Packard Enterprise
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# 1. Redistributions of source code must retain the above copyright notice, this
# list of conditions and the following disclaimer.
#
# 2. Redistributions in binary form must reproduce the above copyright notice,
# this list of conditions and the following disclaimer in the documentation
# and/or other materials provided with the distribution.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

OS = linux
ARCHITECTURE = x64
CUDA_VERSION = 11.8.0
STACK=cuda-$(CUDA_VERSION)

# pyTorch options
PYTORCH_VERSION = 2.4.0
TORCH_CMAKE_OPTIONS = -DBUILD_PYTHON=OFF
TORCH_CMAKE_OPTIONS += -DUSE_NCCL=ON -DUSE_CUDA=ON -DUSE_STATIC_MKL=ON -DUSE_ROCM=OFF -DUSE_CUDNN=ON
TORCH_CMAKE_OPTIONS += -DTORCH_CUDA_ARCH_LIST="All" -DCUDNN_LIBRARY_PATH=${CUDNN_LIBRARY} -DCUDNN_INCLUDE_PATH=${CUDNN_INCLUDE_DIR}
PYTORCH_PREBUILD_TARGETS = pytorch_checkout

# Tensorflow options
TF_VERSION = 2.14.1
TF_TAG = v$(TF_VERSION)
TF_REMOTE = https://github.com/tensorflow/tensorflow.git
TF_PREBUILD_TARGETS = tf_checkout tf_prebuild
TF_BAZEL_OPTS = --jobs 192


ONNXRT_VERSION = 1.17.3
ONNXRT_OPTIONS = --use_cuda --cudnn_home=${CUDNN_INSTALL_PATH}
ONNXRT_PREBUILD_TARGETS = onnxrt_checkout onnxrt_patch

pytorch_checkout:
cd pytorch && \
git checkout v${PYTORCH_VERSION} && \
git submodule update --init --recursive

pytorch_rocm_prebuild:
cd pytorch; python tools/amd_build/build_amd.py
sed -i 's/attr.memoryType/attr.type/g' pytorch/aten/src/ATen/hip/detail/HIPHooks.cpp
cd pytorch; git apply ../patches/pytorch/caffe2_rocm_path.patch

tf_prebuild:
cd tensorflow; \
USE_DEFAULT_PYTHON_LIB_PATH=1 \
PYTHON_BIN_PATH=$$(which python) \
TF_NEED_CLANG=1 \
TF_NEED_ROCM=0 \
TF_NEED_CUDA=1 \
TF_NEED_TENSORRT=0 \
TF_CUDA_CLANG=0 \
CC_OPT_FLAGS="-Wno-sign-compare" \
TF_SET_ANDROID_WORKSPACE=0 \
python configure.py

tf_checkout:
cd tensorflow; \
git fetch $(TF_REMOTE) $(TF_TAG) && \
git checkout FETCH_HEAD

onnxrt_checkout:
cd onnxruntime && \
git checkout v$(ONNXRT_VERSION) && \
git reset --hard && \
git clean -xdf && \
git submodule update --init --recursive

onnxrt_patch:
cd onnxruntime && \
git apply ../patches/onnxruntime/cfloat.patch
89 changes: 89 additions & 0 deletions architectures/linux-cuda-12.5.0
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@

# BSD 2-Clause License
#
# Copyright (c) 2024, Hewlett Packard Enterprise
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# 1. Redistributions of source code must retain the above copyright notice, this
# list of conditions and the following disclaimer.
#
# 2. Redistributions in binary form must reproduce the above copyright notice,
# this list of conditions and the following disclaimer in the documentation
# and/or other materials provided with the distribution.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

OS = linux
ARCHITECTURE = x64
CUDA_VERSION = 12.5.0
STACK=cuda-$(CUDA_VERSION)

# pyTorch options
PYTORCH_VERSION = 2.4.0
TORCH_CMAKE_OPTIONS = -DBUILD_PYTHON=OFF
TORCH_CMAKE_OPTIONS += -DUSE_NCCL=ON -DUSE_CUDA=ON -DUSE_STATIC_MKL=ON -DUSE_ROCM=OFF -DUSE_CUDNN=ON
TORCH_CMAKE_OPTIONS += -DTORCH_CUDA_ARCH_LIST=${TORCH_CUDA_ARCH_LIST} -DCUDNN_LIBRARY_PATH=${CUDNN_LIBRARY} -DCUDNN_INCLUDE_PATH=${CUDNN_INCLUDE_DIR}
PYTORCH_PREBUILD_TARGETS = pytorch_checkout

# Tensorflow options
TF_VERSION = 2.17.0
TF_TAG = v$(TF_VERSION)
TF_REMOTE = https://github.com/tensorflow/tensorflow.git
TF_PREBUILD_TARGETS = tf_checkout tf_prebuild
TF_BAZEL_OPTS = --jobs 192


ONNXRT_VERSION = 1.17.3
ONNXRT_OPTIONS = --use_cuda --cudnn_home=${CUDNN_INSTALL_PATH}
ONNXRT_PREBUILD_TARGETS = onnxrt_checkout onnxrt_patch

pytorch_checkout:
cd pytorch && \
git checkout v${PYTORCH_VERSION} && \
git submodule update --init --recursive

pytorch_rocm_prebuild:
cd pytorch; python tools/amd_build/build_amd.py
sed -i 's/attr.memoryType/attr.type/g' pytorch/aten/src/ATen/hip/detail/HIPHooks.cpp
cd pytorch; git apply ../patches/pytorch/caffe2_rocm_path.patch

tf_prebuild:
cd tensorflow; \
USE_DEFAULT_PYTHON_LIB_PATH=1 \
PYTHON_BIN_PATH=$$(which python) \
TF_NEED_CLANG=1 \
TF_NEED_ROCM=0 \
TF_NEED_CUDA=1 \
TF_NEED_TENSORRT=0 \
TF_CUDA_CLANG=0 \
CC_OPT_FLAGS="-Wno-sign-compare" \
TF_SET_ANDROID_WORKSPACE=0 \
python configure.py

tf_checkout:
cd tensorflow; \
git fetch $(TF_REMOTE) $(TF_TAG) && \
git checkout FETCH_HEAD

onnxrt_checkout:
cd onnxruntime && \
git checkout v$(ONNXRT_VERSION) && \
git reset --hard && \
git clean -xdf && \
git submodule update --init --recursive

onnxrt_patch:
cd onnxruntime && \
git apply ../patches/onnxruntime/cfloat.patch
Loading

0 comments on commit 2028721

Please sign in to comment.