From 4a8935a77e8134d574737e7236b91c814fdacf6c Mon Sep 17 00:00:00 2001 From: Rafal Rudnicki Date: Tue, 3 Dec 2024 16:09:57 +0100 Subject: [PATCH] make disjoint pool a C structure --- .github/workflows/coverity.yml | 1 - .github/workflows/nightly.yml | 2 - .github/workflows/performance.yml | 1 - .github/workflows/reusable_basic.yml | 8 - .github/workflows/reusable_benchmarks.yml | 1 - .github/workflows/reusable_dax.yml | 1 - .github/workflows/reusable_fast.yml | 7 - .github/workflows/reusable_gpu.yml | 1 - .github/workflows/reusable_multi_numa.yml | 1 - .github/workflows/reusable_proxy_lib.yml | 1 - .github/workflows/reusable_sanitizers.yml | 2 - .github/workflows/reusable_valgrind.yml | 1 - CMakeLists.txt | 6 +- README.md | 7 +- benchmark/CMakeLists.txt | 7 - benchmark/multithread.cpp | 7 +- benchmark/ubench.c | 15 +- examples/CMakeLists.txt | 20 +- examples/README.md | 4 +- examples/cuda_shared_memory/CMakeLists.txt | 5 +- examples/ipc_level_zero/CMakeLists.txt | 4 +- .../level_zero_shared_memory/CMakeLists.txt | 4 +- scripts/qemu/run-build.sh | 1 - src/CMakeLists.txt | 3 +- src/libumf.def | 12 + src/libumf.map | 12 + src/pool/CMakeLists.txt | 30 - src/pool/pool_disjoint.c | 1191 +++++++++++++++ src/pool/pool_disjoint.cpp | 1313 ----------------- src/pool/pool_disjoint_internal.h | 221 +++ test/CMakeLists.txt | 86 +- test/c_api/disjoint_pool.c | 3 +- test/pools/disjoint_pool.cpp | 98 +- test/provider_os_memory.cpp | 9 +- test/test_installation.py | 7 - 35 files changed, 1592 insertions(+), 1500 deletions(-) create mode 100644 src/pool/pool_disjoint.c delete mode 100644 src/pool/pool_disjoint.cpp create mode 100644 src/pool/pool_disjoint_internal.h diff --git a/.github/workflows/coverity.yml b/.github/workflows/coverity.yml index 531a463c7..ebae6086a 100644 --- a/.github/workflows/coverity.yml +++ b/.github/workflows/coverity.yml @@ -49,7 +49,6 @@ jobs: -DUMF_FORMAT_CODE_STYLE=OFF -DUMF_DEVELOPER_MODE=OFF -DUMF_TESTS_FAIL_ON_SKIP=ON - -DUMF_BUILD_LIBUMF_POOL_DISJOINT=ON -DUMF_BUILD_LIBUMF_POOL_JEMALLOC=ON - name: Build diff --git a/.github/workflows/nightly.yml b/.github/workflows/nightly.yml index 46543fac8..d62dfff1a 100644 --- a/.github/workflows/nightly.yml +++ b/.github/workflows/nightly.yml @@ -76,7 +76,6 @@ jobs: -DCMAKE_BUILD_TYPE=Debug -DUMF_FORMAT_CODE_STYLE=OFF -DUMF_DEVELOPER_MODE=ON - -DUMF_BUILD_LIBUMF_POOL_DISJOINT=ON -DUMF_BUILD_LIBUMF_POOL_JEMALLOC=ON -DUMF_BUILD_LEVEL_ZERO_PROVIDER=OFF -DUMF_BUILD_CUDA_PROVIDER=OFF @@ -156,7 +155,6 @@ jobs: # -DUMF_BUILD_SHARED_LIBRARY=${{matrix.shared_library}} ^ # -DUMF_FORMAT_CODE_STYLE=OFF ^ # -DUMF_DEVELOPER_MODE=ON ^ - # -DUMF_BUILD_LIBUMF_POOL_DISJOINT=ON ^ # -DUMF_BUILD_LIBUMF_POOL_JEMALLOC=ON ^ # -DUMF_BUILD_LEVEL_ZERO_PROVIDER=ON ^ # -DUMF_BUILD_CUDA_PROVIDER=ON ^ diff --git a/.github/workflows/performance.yml b/.github/workflows/performance.yml index 6057df5f0..d218d6d3a 100644 --- a/.github/workflows/performance.yml +++ b/.github/workflows/performance.yml @@ -77,7 +77,6 @@ jobs: -DUMF_DEVELOPER_MODE=OFF -DUMF_BUILD_LEVEL_ZERO_PROVIDER=ON -DUMF_BUILD_CUDA_PROVIDER=ON - -DUMF_BUILD_LIBUMF_POOL_DISJOINT=ON -DUMF_BUILD_LIBUMF_POOL_JEMALLOC=ON - name: Build diff --git a/.github/workflows/reusable_basic.yml b/.github/workflows/reusable_basic.yml index 3b573453d..119ac5ed2 100644 --- a/.github/workflows/reusable_basic.yml +++ b/.github/workflows/reusable_basic.yml @@ -165,7 +165,6 @@ jobs: -DUMF_FORMAT_CODE_STYLE=OFF -DUMF_DEVELOPER_MODE=ON -DUMF_BUILD_LIBUMF_POOL_JEMALLOC=ON - -DUMF_BUILD_LIBUMF_POOL_DISJOINT=ON -DUMF_TESTS_FAIL_ON_SKIP=ON -DUMF_DISABLE_HWLOC=${{matrix.disable_hwloc}} -DUMF_LINK_HWLOC_STATICALLY=${{matrix.link_hwloc_statically}} @@ -208,7 +207,6 @@ jobs: --build-dir ${{env.BUILD_DIR}} --install-dir ${{env.INSTL_DIR}} --build-type ${{matrix.build_type}} - --disjoint-pool ${{ matrix.install_tbb == 'ON' && matrix.disable_hwloc != 'ON' && matrix.shared_library == 'ON' && '--proxy' || '' }} --umf-version ${{env.UMF_VERSION}} ${{ matrix.shared_library == 'ON' && '--shared-library' || '' }} @@ -278,7 +276,6 @@ jobs: -DUMF_BUILD_SHARED_LIBRARY=${{matrix.shared_library}} -DUMF_FORMAT_CODE_STYLE=OFF -DUMF_DEVELOPER_MODE=ON - -DUMF_BUILD_LIBUMF_POOL_DISJOINT=ON -DUMF_BUILD_LIBUMF_POOL_JEMALLOC=ON -DUMF_BUILD_LEVEL_ZERO_PROVIDER=${{matrix.level_zero_provider}} -DUMF_BUILD_CUDA_PROVIDER=${{matrix.cuda_provider}} @@ -298,7 +295,6 @@ jobs: --build-dir ${{env.BUILD_DIR}} --install-dir ${{env.INSTL_DIR}} --build-type ${{matrix.build_type}} - --disjoint-pool ${{matrix.shared_library == 'ON' && '--proxy' || '' }} --umf-version ${{env.UMF_VERSION}} ${{ matrix.shared_library == 'ON' && '--shared-library' || ''}} @@ -336,7 +332,6 @@ jobs: -DUMF_BUILD_EXAMPLES=OFF -DUMF_FORMAT_CODE_STYLE=OFF -DUMF_DEVELOPER_MODE=ON - -DUMF_BUILD_LIBUMF_POOL_DISJOINT=ON -DUMF_BUILD_LIBUMF_POOL_JEMALLOC=OFF -DUMF_BUILD_LEVEL_ZERO_PROVIDER=ON -DUMF_BUILD_CUDA_PROVIDER=ON @@ -379,7 +374,6 @@ jobs: -DUMF_BUILD_EXAMPLES=OFF -DUMF_FORMAT_CODE_STYLE=OFF -DUMF_DEVELOPER_MODE=ON - -DUMF_BUILD_LIBUMF_POOL_DISJOINT=ON -DUMF_BUILD_LIBUMF_POOL_JEMALLOC=OFF -DUMF_BUILD_LEVEL_ZERO_PROVIDER=ON -DUMF_BUILD_CUDA_PROVIDER=ON @@ -478,7 +472,6 @@ jobs: -DUMF_FORMAT_CODE_STYLE=OFF -DUMF_DEVELOPER_MODE=ON -DUMF_BUILD_LEVEL_ZERO_PROVIDER=OFF - -DUMF_BUILD_LIBUMF_POOL_DISJOINT=ON -DUMF_BUILD_LIBUMF_POOL_JEMALLOC=ON -DUMF_BUILD_SHARED_LIBRARY=ON -DUMF_TESTS_FAIL_ON_SKIP=ON @@ -492,7 +485,6 @@ jobs: --build-dir ${{env.BUILD_DIR}} --install-dir ${{env.INSTL_DIR}} --build-type ${{env.BUILD_TYPE}} - --disjoint-pool --proxy --umf-version ${{env.UMF_VERSION}} --shared-library diff --git a/.github/workflows/reusable_benchmarks.yml b/.github/workflows/reusable_benchmarks.yml index ed6a48294..9000126d5 100644 --- a/.github/workflows/reusable_benchmarks.yml +++ b/.github/workflows/reusable_benchmarks.yml @@ -64,7 +64,6 @@ jobs: -DUMF_DEVELOPER_MODE=OFF -DUMF_BUILD_LEVEL_ZERO_PROVIDER=ON -DUMF_BUILD_CUDA_PROVIDER=ON - -DUMF_BUILD_LIBUMF_POOL_DISJOINT=ON -DUMF_BUILD_LIBUMF_POOL_JEMALLOC=ON - name: Build UMF on Linux diff --git a/.github/workflows/reusable_dax.yml b/.github/workflows/reusable_dax.yml index af15226d2..b394a0107 100644 --- a/.github/workflows/reusable_dax.yml +++ b/.github/workflows/reusable_dax.yml @@ -83,7 +83,6 @@ jobs: -DUMF_BUILD_GPU_EXAMPLES=OFF -DUMF_FORMAT_CODE_STYLE=OFF -DUMF_DEVELOPER_MODE=ON - -DUMF_BUILD_LIBUMF_POOL_DISJOINT=ON -DUMF_BUILD_LIBUMF_POOL_JEMALLOC=ON -DUMF_BUILD_LEVEL_ZERO_PROVIDER=OFF -DUMF_TESTS_FAIL_ON_SKIP=ON diff --git a/.github/workflows/reusable_fast.yml b/.github/workflows/reusable_fast.yml index 5673727ac..a499e8ef7 100644 --- a/.github/workflows/reusable_fast.yml +++ b/.github/workflows/reusable_fast.yml @@ -19,24 +19,20 @@ jobs: matrix: include: - os: windows-latest - disjoint: 'OFF' build_tests: 'ON' simple_cmake: 'OFF' # pure C build (Windows) - os: windows-latest - disjoint: 'OFF' # Tests' building is off for a pure C build build_tests: 'OFF' simple_cmake: 'OFF' - os: ubuntu-latest - disjoint: 'ON' build_tests: 'ON' # Windows doesn't recognize 'CMAKE_BUILD_TYPE', it uses '--config' param in build command extra_build_options: '-DCMAKE_BUILD_TYPE=Release -DUMF_BUILD_BENCHMARKS=ON -DUMF_BUILD_BENCHMARKS_MT=ON' simple_cmake: 'OFF' # pure C build (Linux) - os: ubuntu-latest - disjoint: 'OFF' # Windows doesn't recognize 'CMAKE_BUILD_TYPE', it uses '--config' param in build command # Tests' building is off for a pure C build build_tests: 'OFF' @@ -44,13 +40,11 @@ jobs: simple_cmake: 'OFF' # simplest CMake on ubuntu-latest - os: ubuntu-latest - disjoint: 'OFF' build_tests: 'ON' extra_build_options: '-DCMAKE_BUILD_TYPE=Release' simple_cmake: 'ON' # simplest CMake ubuntu-20.04 - os: ubuntu-20.04 - disjoint: 'OFF' build_tests: 'ON' extra_build_options: '-DCMAKE_BUILD_TYPE=Release' simple_cmake: 'ON' @@ -101,7 +95,6 @@ jobs: -DCMAKE_PREFIX_PATH="${{env.VCPKG_PATH}}" -DUMF_FORMAT_CODE_STYLE=OFF -DUMF_DEVELOPER_MODE=ON - -DUMF_BUILD_LIBUMF_POOL_DISJOINT=${{matrix.disjoint}} -DUMF_BUILD_LIBUMF_POOL_JEMALLOC=ON -DUMF_BUILD_TESTS=${{matrix.build_tests}} -DUMF_BUILD_EXAMPLES=ON diff --git a/.github/workflows/reusable_gpu.yml b/.github/workflows/reusable_gpu.yml index 739aab9e1..5856292be 100644 --- a/.github/workflows/reusable_gpu.yml +++ b/.github/workflows/reusable_gpu.yml @@ -92,7 +92,6 @@ jobs: -DUMF_BUILD_GPU_TESTS=ON -DUMF_BUILD_GPU_EXAMPLES=ON -DUMF_DEVELOPER_MODE=ON - -DUMF_BUILD_LIBUMF_POOL_DISJOINT=ON -DUMF_BUILD_LIBUMF_POOL_JEMALLOC=ON -DUMF_BUILD_CUDA_PROVIDER=OFF -DUMF_BUILD_LEVEL_ZERO_PROVIDER=OFF diff --git a/.github/workflows/reusable_multi_numa.yml b/.github/workflows/reusable_multi_numa.yml index 2ccb2d8f3..28ab341c9 100644 --- a/.github/workflows/reusable_multi_numa.yml +++ b/.github/workflows/reusable_multi_numa.yml @@ -44,7 +44,6 @@ jobs: -DUMF_BUILD_BENCHMARKS=OFF -DUMF_BUILD_TESTS=ON -DUMF_DEVELOPER_MODE=ON - -DUMF_BUILD_LIBUMF_POOL_DISJOINT=ON -DUMF_BUILD_LIBUMF_POOL_JEMALLOC=${{ matrix.os == 'rhel-9.1' && 'OFF' || 'ON' }} -DUMF_TESTS_FAIL_ON_SKIP=ON ${{ matrix.build_type == 'Debug' && matrix.os == 'ubuntu-22.04' && '-DUMF_USE_COVERAGE=ON' || '' }} diff --git a/.github/workflows/reusable_proxy_lib.yml b/.github/workflows/reusable_proxy_lib.yml index e73dabe29..a381eaeb6 100644 --- a/.github/workflows/reusable_proxy_lib.yml +++ b/.github/workflows/reusable_proxy_lib.yml @@ -51,7 +51,6 @@ jobs: -DUMF_FORMAT_CODE_STYLE=OFF -DUMF_DEVELOPER_MODE=OFF -DUMF_BUILD_LIBUMF_POOL_JEMALLOC=ON - -DUMF_BUILD_LIBUMF_POOL_DISJOINT=ON -DUMF_TESTS_FAIL_ON_SKIP=ON -DUMF_PROXY_LIB_BASED_ON_POOL=${{matrix.proxy_lib_pool}} ${{ matrix.build_type == 'Debug' && '-DUMF_USE_COVERAGE=ON' || '' }} diff --git a/.github/workflows/reusable_sanitizers.yml b/.github/workflows/reusable_sanitizers.yml index f9e121f88..317fea664 100644 --- a/.github/workflows/reusable_sanitizers.yml +++ b/.github/workflows/reusable_sanitizers.yml @@ -59,7 +59,6 @@ jobs: -DUMF_FORMAT_CODE_STYLE=OFF -DUMF_DEVELOPER_MODE=ON -DUMF_BUILD_LIBUMF_POOL_JEMALLOC=ON - -DUMF_BUILD_LIBUMF_POOL_DISJOINT=ON -DUMF_USE_ASAN=${{matrix.sanitizers.asan}} -DUMF_USE_UBSAN=${{matrix.sanitizers.ubsan}} -DUMF_USE_TSAN=${{matrix.sanitizers.tsan}} @@ -132,7 +131,6 @@ jobs: -DUMF_BUILD_SHARED_LIBRARY=OFF -DUMF_FORMAT_CODE_STYLE=OFF -DUMF_DEVELOPER_MODE=ON - -DUMF_BUILD_LIBUMF_POOL_DISJOINT=ON -DUMF_USE_ASAN=${{matrix.sanitizers.asan}} -DUMF_BUILD_EXAMPLES=ON -DUMF_BUILD_LEVEL_ZERO_PROVIDER=OFF diff --git a/.github/workflows/reusable_valgrind.yml b/.github/workflows/reusable_valgrind.yml index 3e0af273a..7cc879ed3 100644 --- a/.github/workflows/reusable_valgrind.yml +++ b/.github/workflows/reusable_valgrind.yml @@ -29,7 +29,6 @@ jobs: -DCMAKE_BUILD_TYPE=Debug -DUMF_FORMAT_CODE_STYLE=OFF -DUMF_DEVELOPER_MODE=ON - -DUMF_BUILD_LIBUMF_POOL_DISJOINT=ON -DUMF_BUILD_LIBUMF_POOL_JEMALLOC=ON -DUMF_BUILD_LEVEL_ZERO_PROVIDER=OFF -DUMF_BUILD_CUDA_PROVIDER=OFF diff --git a/CMakeLists.txt b/CMakeLists.txt index 82381f5b5..c66eeef41 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -37,8 +37,6 @@ find_package(PkgConfig) option(UMF_BUILD_SHARED_LIBRARY "Build UMF as shared library" OFF) option(UMF_BUILD_LEVEL_ZERO_PROVIDER "Build Level Zero memory provider" ON) option(UMF_BUILD_CUDA_PROVIDER "Build CUDA memory provider" ON) -option(UMF_BUILD_LIBUMF_POOL_DISJOINT - "Build the libumf_pool_disjoint static library" OFF) option(UMF_BUILD_LIBUMF_POOL_JEMALLOC "Build the libumf_pool_jemalloc static library" OFF) option(UMF_BUILD_TESTS "Build UMF tests" ON) @@ -408,8 +406,8 @@ endif() # For using the options listed in the OPTIONS_REQUIRING_CXX variable a C++17 # compiler is required. Moreover, if these options are not set, CMake will set # up a strict C build, without C++ support. -set(OPTIONS_REQUIRING_CXX "UMF_BUILD_TESTS" "UMF_BUILD_LIBUMF_POOL_DISJOINT" - "UMF_BUILD_BENCHMARKS_MT" "UMF_BUILD_BENCHMARKS") +set(OPTIONS_REQUIRING_CXX "UMF_BUILD_TESTS" "UMF_BUILD_BENCHMARKS_MT" + "UMF_BUILD_BENCHMARKS") foreach(option_name ${OPTIONS_REQUIRING_CXX}) if(${option_name}) enable_language(CXX) diff --git a/README.md b/README.md index 0c569c1b3..a66fece2b 100644 --- a/README.md +++ b/README.md @@ -37,7 +37,7 @@ For development and contributions: - cmake-format-0.6 (can be installed with `python -m pip install cmake-format==0.6.13`) - black (can be installed with `python -m pip install black==24.3.0`) -For building tests, multithreaded benchmarks and Disjoint Pool: +For building tests and multithreaded benchmarks: - C++ compiler with C++17 support For Level Zero memory provider tests: @@ -100,7 +100,6 @@ List of options provided by CMake: | UMF_BUILD_SHARED_LIBRARY | Build UMF as shared library | ON/OFF | OFF | | UMF_BUILD_LEVEL_ZERO_PROVIDER | Build Level Zero memory provider | ON/OFF | ON | | UMF_BUILD_CUDA_PROVIDER | Build CUDA memory provider | ON/OFF | ON | -| UMF_BUILD_LIBUMF_POOL_DISJOINT | Build the libumf_pool_disjoint static library | ON/OFF | OFF | | UMF_BUILD_LIBUMF_POOL_JEMALLOC | Build the libumf_pool_jemalloc static library | ON/OFF | OFF | | UMF_BUILD_TESTS | Build UMF tests | ON/OFF | ON | | UMF_BUILD_GPU_TESTS | Build UMF GPU tests | ON/OFF | OFF | @@ -258,10 +257,6 @@ To enable this feature, the `UMF_BUILD_SHARED_LIBRARY` option needs to be turned TODO: Add a description -##### Requirements - -To enable this feature, the `UMF_BUILD_LIBUMF_POOL_DISJOINT` option needs to be turned `ON`. - #### Jemalloc pool Jemalloc pool is a [jemalloc](https://github.com/jemalloc/jemalloc)-based memory diff --git a/benchmark/CMakeLists.txt b/benchmark/CMakeLists.txt index efad0baf3..bf5cd9e31 100644 --- a/benchmark/CMakeLists.txt +++ b/benchmark/CMakeLists.txt @@ -87,10 +87,6 @@ function(add_umf_benchmark) "${DLL_PATH_LIST}") endif() - if(UMF_BUILD_LIBUMF_POOL_DISJOINT) - target_compile_definitions(${BENCH_NAME} - PRIVATE UMF_POOL_DISJOINT_ENABLED=1) - endif() if(UMF_POOL_JEMALLOC_ENABLED) target_compile_definitions(${BENCH_NAME} PRIVATE UMF_POOL_JEMALLOC_ENABLED=1) @@ -118,9 +114,6 @@ endfunction() set(LIB_DIRS ${LIBHWLOC_LIBRARY_DIRS}) # optional libraries -if(UMF_BUILD_LIBUMF_POOL_DISJOINT) - set(LIBS_OPTIONAL ${LIBS_OPTIONAL} disjoint_pool) -endif() if(LINUX) set(LIBS_OPTIONAL ${LIBS_OPTIONAL} m) endif() diff --git a/benchmark/multithread.cpp b/benchmark/multithread.cpp index ecc238529..fc3eaadd7 100644 --- a/benchmark/multithread.cpp +++ b/benchmark/multithread.cpp @@ -121,7 +121,7 @@ int main() { std::cout << "skipping jemalloc_pool mt_alloc_free" << std::endl; #endif -#if defined(UMF_POOL_DISJOINT_ENABLED) + // NOTE: disjoint pool is always enabled umf_disjoint_pool_params_handle_t hDisjointParams = nullptr; umf_result_t ret = umfDisjointPoolParamsCreate(&hDisjointParams); if (ret != UMF_RESULT_SUCCESS) { @@ -132,20 +132,15 @@ int main() { std::cout << "disjoint_pool mt_alloc_free: "; mt_alloc_free(poolCreateExtParams{umfDisjointPoolOps(), hDisjointParams, umfOsMemoryProviderOps(), osParams}); -#else - std::cout << "skipping disjoint_pool mt_alloc_free" << std::endl; -#endif // ctest looks for "PASSED" in the output std::cout << "PASSED" << std::endl; -#if defined(UMF_POOL_DISJOINT_ENABLED) ret = umfDisjointPoolParamsDestroy(hDisjointParams); if (ret != UMF_RESULT_SUCCESS) { std::cerr << "disjoint pool params destroy failed" << std::endl; return -1; } -#endif return 0; } diff --git a/benchmark/ubench.c b/benchmark/ubench.c index 845dc881d..ee2a51626 100644 --- a/benchmark/ubench.c +++ b/benchmark/ubench.c @@ -15,23 +15,19 @@ #include #include +#include #include #include #include #include -#ifdef UMF_POOL_DISJOINT_ENABLED -#include -#endif - #ifdef UMF_POOL_JEMALLOC_ENABLED #include #endif #include "utils_common.h" -#if (defined UMF_BUILD_LIBUMF_POOL_DISJOINT && \ - defined UMF_BUILD_LEVEL_ZERO_PROVIDER && defined UMF_BUILD_GPU_TESTS) +#if (defined UMF_BUILD_LEVEL_ZERO_PROVIDER && defined UMF_BUILD_GPU_TESTS) #include "utils_level_zero.h" #endif @@ -244,7 +240,6 @@ UBENCH_EX(simple, proxy_pool_with_os_memory_provider) { free(array); } -#if (defined UMF_POOL_DISJOINT_ENABLED) ////////////////// DISJOINT POOL WITH OS MEMORY PROVIDER UBENCH_EX(simple, disjoint_pool_with_os_memory_provider) { @@ -327,7 +322,6 @@ UBENCH_EX(simple, disjoint_pool_with_os_memory_provider) { umfMemoryProviderDestroy(os_memory_provider); free(array); } -#endif /* (defined UMF_POOL_DISJOINT_ENABLED) */ #if (defined UMF_POOL_JEMALLOC_ENABLED) ////////////////// JEMALLOC POOL WITH OS MEMORY PROVIDER @@ -421,8 +415,7 @@ UBENCH_EX(simple, scalable_pool_with_os_memory_provider) { } #endif /* (defined UMF_POOL_SCALABLE_ENABLED) */ -#if (defined UMF_POOL_DISJOINT_ENABLED && \ - defined UMF_BUILD_LEVEL_ZERO_PROVIDER && defined UMF_BUILD_GPU_TESTS) +#if (defined UMF_BUILD_LEVEL_ZERO_PROVIDER && defined UMF_BUILD_GPU_TESTS) static void do_ipc_get_put_benchmark(alloc_t *allocs, size_t num_allocs, size_t repeats, umf_ipc_handle_t *ipc_handles) { @@ -630,7 +623,7 @@ UBENCH_EX(ipc, disjoint_pool_with_level_zero_provider) { err_destroy_context: utils_ze_destroy_context(context); } -#endif /* (defined UMF_POLL_DISJOINT_ENABLED && defined UMF_BUILD_LEVEL_ZERO_PROVIDER && defined UMF_BUILD_GPU_TESTS) */ +#endif /* (defined UMF_BUILD_LEVEL_ZERO_PROVIDER && defined UMF_BUILD_GPU_TESTS) */ // TODO add IPC benchmark for CUDA diff --git a/examples/CMakeLists.txt b/examples/CMakeLists.txt index 986ad5641..6f6a30099 100644 --- a/examples/CMakeLists.txt +++ b/examples/CMakeLists.txt @@ -41,16 +41,14 @@ if(UMF_POOL_SCALABLE_ENABLED) endif() endif() -if(UMF_BUILD_GPU_EXAMPLES - AND UMF_BUILD_LIBUMF_POOL_DISJOINT - AND UMF_BUILD_LEVEL_ZERO_PROVIDER) +if(UMF_BUILD_GPU_EXAMPLES AND UMF_BUILD_LEVEL_ZERO_PROVIDER) set(EXAMPLE_NAME umf_example_level_zero_shared_memory) add_umf_executable( NAME ${EXAMPLE_NAME} SRCS level_zero_shared_memory/level_zero_shared_memory.c common/examples_level_zero_helpers.c - LIBS disjoint_pool ze_loader umf) + LIBS ze_loader umf) target_include_directories( ${EXAMPLE_NAME} @@ -74,12 +72,11 @@ if(UMF_BUILD_GPU_EXAMPLES endif() else() message(STATUS "GPU Level Zero shared memory example requires " - "UMF_BUILD_GPU_EXAMPLES, UMF_BUILD_LEVEL_ZERO_PROVIDER and " - "UMF_BUILD_LIBUMF_POOL_DISJOINT to be turned ON - skipping") + "UMF_BUILD_GPU_EXAMPLES and UMF_BUILD_LEVEL_ZERO_PROVIDER " + "to be turned ON - skipping") endif() if(UMF_BUILD_GPU_EXAMPLES - AND UMF_BUILD_LIBUMF_POOL_DISJOINT AND UMF_BUILD_CUDA_PROVIDER AND UMF_CUDA_ENABLED) set(EXAMPLE_NAME umf_example_cuda_shared_memory) @@ -87,7 +84,7 @@ if(UMF_BUILD_GPU_EXAMPLES add_umf_executable( NAME ${EXAMPLE_NAME} SRCS cuda_shared_memory/cuda_shared_memory.c - LIBS disjoint_pool cuda umf) + LIBS cuda umf) target_include_directories( ${EXAMPLE_NAME} @@ -113,14 +110,13 @@ if(UMF_BUILD_GPU_EXAMPLES else() message( STATUS - "GPU CUDA shared memory example requires UMF_BUILD_GPU_EXAMPLES, UMF_BUILD_CUDA_PROVIDER, UMF_BUILD_LIBUMF_POOL_DISJOINT to be turned ON and installed CUDA libraries - skipping" + "GPU CUDA shared memory example requires UMF_BUILD_GPU_EXAMPLES and UMF_BUILD_CUDA_PROVIDER to be turned ON and installed CUDA libraries - skipping" ) endif() # TODO: it looks like there is some problem with IPC implementation in Level # Zero on windows if(UMF_BUILD_GPU_EXAMPLES - AND UMF_BUILD_LIBUMF_POOL_DISJOINT AND UMF_BUILD_LEVEL_ZERO_PROVIDER AND LINUX) set(EXAMPLE_NAME umf_example_ipc_level_zero) @@ -129,7 +125,7 @@ if(UMF_BUILD_GPU_EXAMPLES NAME ${EXAMPLE_NAME} SRCS ipc_level_zero/ipc_level_zero.c common/examples_level_zero_helpers.c - LIBS disjoint_pool ze_loader umf) + LIBS ze_loader umf) target_include_directories( ${EXAMPLE_NAME} @@ -154,7 +150,7 @@ if(UMF_BUILD_GPU_EXAMPLES else() message( STATUS - "IPC Level 0 example requires UMF_BUILD_GPU_EXAMPLES, UMF_BUILD_LEVEL_ZERO_PROVIDER and UMF_BUILD_LIBUMF_POOL_DISJOINT to be turned ON - skipping" + "IPC Level 0 example requires UMF_BUILD_GPU_EXAMPLES and UMF_BUILD_LEVEL_ZERO_PROVIDER to be turned ON - skipping" ) endif() diff --git a/examples/README.md b/examples/README.md index e7823347e..70d114a63 100644 --- a/examples/README.md +++ b/examples/README.md @@ -24,7 +24,7 @@ cleans up and exits with an error status. ### Requirements * Level Zero headers and libraries * compatible GPU with installed driver -* set UMF_BUILD_GPU_EXAMPLES, UMF_BUILD_LIBUMF_POOL_DISJOINT and UMF_BUILD_LEVEL_ZERO_PROVIDER CMake configuration flags to ON +* set UMF_BUILD_GPU_EXAMPLES and UMF_BUILD_LEVEL_ZERO_PROVIDER CMake configuration flags to ON ## IPC example with Level Zero memory provider This example demonstrates how to use UMF IPC API. The example creates two @@ -35,7 +35,7 @@ and build this example Level Zero development package should be installed. ### Requirements * Level Zero headers and libraries * compatible GPU with installed driver -* set UMF_BUILD_GPU_EXAMPLES, UMF_BUILD_LIBUMF_POOL_DISJOINT and UMF_BUILD_LEVEL_ZERO_PROVIDER CMake configuration flags to ON +* set UMF_BUILD_GPU_EXAMPLES and UMF_BUILD_LEVEL_ZERO_PROVIDER CMake configuration flags to ON ## IPC example with shared memory This example also demonstrates how to use UMF IPC API. The example creates two diff --git a/examples/cuda_shared_memory/CMakeLists.txt b/examples/cuda_shared_memory/CMakeLists.txt index dd8567c14..07daffc1c 100644 --- a/examples/cuda_shared_memory/CMakeLists.txt +++ b/examples/cuda_shared_memory/CMakeLists.txt @@ -55,9 +55,8 @@ target_link_directories( ${LIBHWLOC_LIBRARY_DIRS} ${CUDA_LIBRARY_DIRS}) target_link_options(${EXAMPLE_NAME} PRIVATE "-Wl,--start-group") -target_link_libraries( - ${EXAMPLE_NAME} PRIVATE stdc++ libdisjoint_pool.a ${CUDA_LIBRARIES} - ${LIBUMF_LIBRARIES}) +target_link_libraries(${EXAMPLE_NAME} PRIVATE stdc++ ${CUDA_LIBRARIES} + ${LIBUMF_LIBRARIES}) # an optional part - adds a test of this example add_test( diff --git a/examples/ipc_level_zero/CMakeLists.txt b/examples/ipc_level_zero/CMakeLists.txt index 5c17d4c9c..9d04d998c 100644 --- a/examples/ipc_level_zero/CMakeLists.txt +++ b/examples/ipc_level_zero/CMakeLists.txt @@ -53,8 +53,8 @@ target_include_directories(${EXAMPLE_NAME} PRIVATE ${LIBUMF_INCLUDE_DIRS} target_link_directories(${EXAMPLE_NAME} PRIVATE ${LIBUMF_LIBRARY_DIRS} ${LIBHWLOC_LIBRARY_DIRS}) target_link_options(${EXAMPLE_NAME} PRIVATE "-Wl,--start-group") -target_link_libraries(${EXAMPLE_NAME} PRIVATE stdc++ libdisjoint_pool.a - ze_loader ${LIBUMF_LIBRARIES}) +target_link_libraries(${EXAMPLE_NAME} PRIVATE stdc++ ze_loader + ${LIBUMF_LIBRARIES}) # an optional part - adds a test of this example add_test( diff --git a/examples/level_zero_shared_memory/CMakeLists.txt b/examples/level_zero_shared_memory/CMakeLists.txt index 3711b4094..5101b2fd8 100644 --- a/examples/level_zero_shared_memory/CMakeLists.txt +++ b/examples/level_zero_shared_memory/CMakeLists.txt @@ -53,8 +53,8 @@ target_include_directories(${EXAMPLE_NAME} PRIVATE ${LIBUMF_INCLUDE_DIRS} target_link_directories(${EXAMPLE_NAME} PRIVATE ${LIBUMF_LIBRARY_DIRS} ${LIBHWLOC_LIBRARY_DIRS}) target_link_options(${EXAMPLE_NAME} PRIVATE "-Wl,--start-group") -target_link_libraries(${EXAMPLE_NAME} PRIVATE stdc++ libdisjoint_pool.a - ze_loader ${LIBUMF_LIBRARIES}) +target_link_libraries(${EXAMPLE_NAME} PRIVATE stdc++ ze_loader + ${LIBUMF_LIBRARIES}) # an optional part - adds a test of this example add_test( diff --git a/scripts/qemu/run-build.sh b/scripts/qemu/run-build.sh index b0f4bee1e..34a3b7b5e 100755 --- a/scripts/qemu/run-build.sh +++ b/scripts/qemu/run-build.sh @@ -25,7 +25,6 @@ cmake .. \ -DUMF_BUILD_CUDA_PROVIDER=ON \ -DUMF_FORMAT_CODE_STYLE=OFF \ -DUMF_DEVELOPER_MODE=ON \ - -DUMF_BUILD_LIBUMF_POOL_DISJOINT=ON \ -DUMF_BUILD_LIBUMF_POOL_JEMALLOC=ON \ -DUMF_BUILD_EXAMPLES=ON \ -DUMF_USE_COVERAGE=${COVERAGE} \ diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index fb32b6d2e..5ffc301a7 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -67,8 +67,9 @@ set(UMF_SOURCES provider/provider_tracking.c critnib/critnib.c ravl/ravl.c - pool/pool_proxy.c + pool/pool_disjoint.c pool/pool_jemalloc.c + pool/pool_proxy.c pool/pool_scalable.c) if(UMF_POOL_JEMALLOC_ENABLED) diff --git a/src/libumf.def b/src/libumf.def index 0aa9f0a1f..20bf2616a 100644 --- a/src/libumf.def +++ b/src/libumf.def @@ -25,6 +25,18 @@ EXPORTS umfDevDaxMemoryProviderParamsDestroy umfDevDaxMemoryProviderParamsSetDeviceDax umfDevDaxMemoryProviderParamsSetProtection + umfDisjointPoolOps + umfDisjointPoolParamsCreate + umfDisjointPoolParamsDestroy + umfDisjointPoolParamsSetCapacity + umfDisjointPoolParamsSetMaxPoolableSize + umfDisjointPoolParamsSetMinBucketSize + umfDisjointPoolParamsSetName + umfDisjointPoolParamsSetSharedLimits + umfDisjointPoolParamsSetSlabMinSize + umfDisjointPoolParamsSetTrace + umfDisjointPoolSharedLimitsCreate + umfDisjointPoolSharedLimitsDestroy umfFree umfFileMemoryProviderOps umfFileMemoryProviderParamsCreate diff --git a/src/libumf.map b/src/libumf.map index 1635fe018..e63b8067c 100644 --- a/src/libumf.map +++ b/src/libumf.map @@ -19,6 +19,18 @@ UMF_1.0 { umfDevDaxMemoryProviderParamsDestroy; umfDevDaxMemoryProviderParamsSetDeviceDax; umfDevDaxMemoryProviderParamsSetProtection; + umfDisjointPoolOps; + umfDisjointPoolParamsCreate; + umfDisjointPoolParamsDestroy; + umfDisjointPoolParamsSetCapacity; + umfDisjointPoolParamsSetMaxPoolableSize; + umfDisjointPoolParamsSetMinBucketSize; + umfDisjointPoolParamsSetName; + umfDisjointPoolParamsSetSharedLimits; + umfDisjointPoolParamsSetSlabMinSize; + umfDisjointPoolParamsSetTrace; + umfDisjointPoolSharedLimitsCreate; + umfDisjointPoolSharedLimitsDestroy; umfFree; umfFileMemoryProviderOps; umfFileMemoryProviderParamsCreate; diff --git a/src/pool/CMakeLists.txt b/src/pool/CMakeLists.txt index 17be932a4..45039b168 100644 --- a/src/pool/CMakeLists.txt +++ b/src/pool/CMakeLists.txt @@ -8,33 +8,3 @@ if(UMF_BUILD_SHARED_LIBRARY) endif() set(POOL_COMPILE_DEFINITIONS ${UMF_COMMON_COMPILE_DEFINITIONS}) - -# libumf_pool_disjoint -if(UMF_BUILD_LIBUMF_POOL_DISJOINT) - add_umf_library( - NAME disjoint_pool - TYPE STATIC - SRCS pool_disjoint.cpp ${POOL_EXTRA_SRCS} - LIBS ${POOL_EXTRA_LIBS}) - - target_compile_definitions(disjoint_pool - PRIVATE ${POOL_COMPILE_DEFINITIONS}) - - if(WINDOWS) - target_compile_options(disjoint_pool PRIVATE /DWIN32_LEAN_AND_MEAN - /DNOMINMAX) - endif() - - add_library(${PROJECT_NAME}::disjoint_pool ALIAS disjoint_pool) - - add_dependencies(disjoint_pool umf) - - target_link_libraries(disjoint_pool PRIVATE umf) - - target_include_directories( - disjoint_pool - PUBLIC $ - $) - - install(TARGETS disjoint_pool EXPORT ${PROJECT_NAME}-targets) -endif() diff --git a/src/pool/pool_disjoint.c b/src/pool/pool_disjoint.c new file mode 100644 index 000000000..35c31128e --- /dev/null +++ b/src/pool/pool_disjoint.c @@ -0,0 +1,1191 @@ +/* + * Copyright (C) 2022-2024 Intel Corporation + * + * Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +*/ + +#include "pool_disjoint_internal.h" + +static __TLS umf_result_t TLS_last_allocation_error; + +// Allocations are a minimum of 4KB/64KB/2MB even when a smaller size is +// requested. The implementation distinguishes between allocations of size +// ChunkCutOff = (minimum-alloc-size / 2) and those that are larger. +// Allocation requests smaller than ChunkCutoff use chunks taken from a single +// coarse-grain allocation. Thus, for example, for a 64KB minimum allocation +// size, and 8-byte allocations, only 1 in ~8000 requests results in a new +// coarse-grain allocation. Freeing results only in a chunk of a larger +// allocation to be marked as available and no real return to the system. An +// allocation is returned to the system only when all chunks in the larger +// allocation are freed by the program. Allocations larger than ChunkCutOff use +// a separate coarse-grain allocation for each request. These are subject to +// "pooling". That is, when such an allocation is freed by the program it is +// retained in a pool. The pool is available for future allocations, which means +// there are fewer actual coarse-grain allocations/deallocations. + +// The largest size which is allocated via the allocator. +// Allocations with size > CutOff bypass the pool and +// go directly to the provider. +static size_t CutOff = (size_t)1 << 31; // 2GB + +// Temporary solution for disabling memory poisoning. This is needed because +// AddressSanitizer does not support memory poisoning for GPU allocations. +// More info: https://github.com/oneapi-src/unified-memory-framework/issues/634 +#ifndef POISON_MEMORY +#define POISON_MEMORY 0 +#endif + +static void annotate_memory_inaccessible(void *ptr, size_t size) { + (void)ptr; + (void)size; +#if (POISON_MEMORY != 0) + utils_annotate_memory_inaccessible(ptr, size); +#endif +} + +static void annotate_memory_undefined(void *ptr, size_t size) { + (void)ptr; + (void)size; +#if (POISON_MEMORY != 0) + utils_annotate_memory_undefined(ptr, size); +#endif +} + +slab_t *create_slab(bucket_t *bucket) { + assert(bucket); + + slab_t *slab = umf_ba_global_alloc(sizeof(slab_t)); + if (slab == NULL) { + LOG_ERR("allocation of new slab failed!"); + return NULL; + } + + slab->num_allocated = 0; + slab->first_free_chunk_idx = 0; + slab->bucket = bucket; + + slab->iter = + (slab_list_item_t *)umf_ba_global_alloc(sizeof(slab_list_item_t)); + if (slab->iter == NULL) { + LOG_ERR("allocation of new slab iter failed!"); + umf_ba_global_free(slab); + return NULL; + } + slab->iter->val = slab; + slab->iter->prev = slab->iter->next = NULL; + + slab->num_chunks = bucket_slab_min_size(bucket) / bucket->size; + slab->chunks = umf_ba_global_alloc(sizeof(bool) * slab->num_chunks); + if (slab->chunks == NULL) { + LOG_ERR("allocation of slab chunks failed!"); + umf_ba_global_free(slab->iter); + umf_ba_global_free(slab); + return NULL; + } + memset(slab->chunks, 0, sizeof(bool) * slab->num_chunks); + + // in case bucket size is not a multiple of slab_min_size, we would have + // some padding at the end of the slab + slab->slab_size = bucket_slab_alloc_size(bucket); + + // NOTE: originally slabs memory were allocated without alignment + // with this registering a slab is simpler and doesn't require multimap + umf_memory_provider_handle_t provider = bucket->pool->provider; + umf_result_t res = + umfMemoryProviderAlloc(provider, slab->slab_size, + bucket_slab_min_size(bucket), &slab->mem_ptr); + if (res == UMF_RESULT_ERROR_OUT_OF_HOST_MEMORY) { + LOG_ERR("allocation of slab data failed!"); + destroy_slab(slab); + return NULL; + } + + // raw allocation is not available for user so mark it as inaccessible + annotate_memory_inaccessible(slab->mem_ptr, slab->slab_size); + + LOG_DEBUG("bucket: %p, slab_size: %zu\n", (void *)bucket, slab->slab_size); + return slab; +} + +void destroy_slab(slab_t *slab) { + LOG_DEBUG("bucket: %p, slab_size: %zu\n", (void *)slab->bucket, + slab->slab_size); + + umf_memory_provider_handle_t provider = slab->bucket->pool->provider; + umf_result_t res = + umfMemoryProviderFree(provider, slab->mem_ptr, slab->slab_size); + if (res != UMF_RESULT_SUCCESS) { + LOG_ERR("deallocation of slab data failed!"); + } + + umf_ba_global_free(slab->chunks); + umf_ba_global_free(slab->iter); + umf_ba_global_free(slab); +} + +size_t slab_find_first_available_chunk_idx(const slab_t *slab) { + // return the index of the first available chunk, SIZE_MAX otherwise + + // use the first free chunk index as a hint for the search + bool *chunk = slab->chunks + slab->first_free_chunk_idx; + while (chunk != slab->chunks + slab->num_chunks) { + // false means not used + if (*chunk == false) { + size_t idx = (chunk - slab->chunks) / sizeof(bool); + LOG_DEBUG("idx: %zu\n", idx); + return idx; + } + chunk++; + } + + LOG_DEBUG("idx: SIZE_MAX\n"); + return SIZE_MAX; +} + +void *slab_get_chunk(slab_t *slab) { + // free chunk must exist, otherwise we would have allocated another slab + const size_t chunk_idx = slab_find_first_available_chunk_idx(slab); + assert(chunk_idx != SIZE_MAX); + + void *free_chunk = + (uint8_t *)slab->mem_ptr + chunk_idx * slab->bucket->size; + + // mark chunk as used + slab->chunks[chunk_idx] = true; + slab->num_allocated += 1; + + // use the found index as the next hint + slab->first_free_chunk_idx = chunk_idx + 1; + + return free_chunk; +} + +void *slab_get(const slab_t *slab) { return slab->mem_ptr; } +void *slab_get_end(const slab_t *slab) { + return (uint8_t *)slab->mem_ptr + bucket_slab_min_size(slab->bucket); +} + +void slab_free_chunk(slab_t *slab, void *ptr) { + // This method should be called through bucket (since we might remove the + // slab as a result), therefore all locks are done on that level. + + // Make sure that we're in the right slab + assert(ptr >= slab_get(slab) && ptr < slab_get_end(slab)); + + // Even if the pointer p was previously aligned, it's still inside the + // corresponding chunk, so we get the correct index here. + size_t chunk_idx = + ((uint8_t *)ptr - (uint8_t *)slab->mem_ptr) / slab->bucket->size; + + // Make sure that the chunk was allocated + assert(slab->chunks[chunk_idx] && "double free detected"); + slab->chunks[chunk_idx] = false; + slab->num_allocated -= 1; + + if (chunk_idx < slab->first_free_chunk_idx) { + slab->first_free_chunk_idx = chunk_idx; + } + + LOG_DEBUG("chunk_idx: %zu, num_allocated: %zu, " + "first_free_chunk_idx: %zu\n", + chunk_idx, slab->num_allocated, slab->first_free_chunk_idx); +} + +bool slab_has_avail(const slab_t *slab) { + return slab->num_allocated != slab->num_chunks; +} + +void slab_reg(slab_t *slab) { + bucket_t *bucket = slab->bucket; + + // NOTE: changed vs original DisjointPool implementation - currently slab + // is already aligned to bucket_slab_min_size. Additionally the end addr + // points to the last byte of slab data + void *start_addr = (void *)ALIGN_DOWN((size_t)slab_get(slab), + bucket_slab_min_size(bucket)); + void *end_addr = (uint8_t *)(start_addr) + bucket_slab_min_size(bucket) - 1; + + LOG_DEBUG("slab: %p, start: %p, end %p\n", (void *)slab, start_addr, + end_addr); + + slab_reg_by_addr(start_addr, slab); + slab_reg_by_addr(end_addr, slab); +} + +void slab_unreg(slab_t *slab) { + bucket_t *bucket = slab->bucket; + + // NOTE: changed vs original DisjointPool implementation - currently slab + // is already aligned to bucket_slab_min_size. Additionally the end addr + // points to the last byte of slab data + void *start_addr = (void *)ALIGN_DOWN((size_t)slab_get(slab), + bucket_slab_min_size(bucket)); + void *end_addr = (uint8_t *)(start_addr) + bucket_slab_min_size(bucket) - 1; + + LOG_DEBUG("slab: %p, start: %p, end %p\n", (void *)slab, start_addr, + end_addr); + + slab_unreg_by_addr(start_addr, slab); + slab_unreg_by_addr(end_addr, slab); +} + +bucket_t * +create_bucket(size_t sz, disjoint_pool_t *pool, + umf_disjoint_pool_shared_limits_handle_t shared_limits) { + + bucket_t *bucket = (bucket_t *)umf_ba_global_alloc(sizeof(bucket_t)); + if (bucket == NULL) { + LOG_ERR("allocation of new bucket failed!"); + return NULL; + } + + bucket->size = sz; + bucket->pool = pool; + bucket->available_slabs = NULL; + bucket->available_slabs_num = 0; + bucket->unavailable_slabs = NULL; + bucket->chunked_slabs_in_pool = 0; + bucket->alloc_pool_count = 0; + bucket->free_count = 0; + bucket->curr_slabs_in_use = 0; + bucket->curr_slabs_in_pool = 0; + bucket->max_slabs_in_pool = 0; + bucket->alloc_count = 0; + bucket->max_slabs_in_use = 0; + bucket->shared_limits = shared_limits; + + utils_mutex_init(&bucket->bucket_lock); + return bucket; +} + +void destroy_bucket(bucket_t *bucket) { + // use an extra tmp to store the next iterator before destroying the slab + slab_list_item_t *it = NULL, *tmp = NULL; + LL_FOREACH_SAFE(bucket->available_slabs, it, tmp) { destroy_slab(it->val); } + LL_FOREACH_SAFE(bucket->unavailable_slabs, it, tmp) { + destroy_slab(it->val); + } + + utils_mutex_destroy_not_free(&bucket->bucket_lock); + umf_ba_global_free(bucket); +} + +void bucket_free_chunk(bucket_t *bucket, void *ptr, slab_t *slab, + bool *to_pool) { + utils_mutex_lock(&bucket->bucket_lock); + + slab_free_chunk(slab, ptr); + + // in case if the slab was previously full and now has single available + // chunk, it should be moved to the list of available slabs + if (slab->num_allocated == (slab->num_chunks - 1)) { + slab_list_item_t *slab_it = slab->iter; + assert(slab_it->val != NULL); + DL_DELETE(bucket->unavailable_slabs, slab_it); + DL_PREPEND(bucket->available_slabs, slab_it); + bucket->available_slabs_num++; + } + + // check if slab is empty, and pool it if we can + *to_pool = true; + if (slab->num_allocated == 0) { + // The slab is now empty. + // If pool has capacity then put the slab in the pool. + // The to_pool parameter indicates whether the slab will be put in the + // pool or freed. + if (!bucket_can_pool(bucket, to_pool)) { + // remove slab + slab_list_item_t *slab_it = slab->iter; + assert(slab_it->val != NULL); + slab_unreg(slab_it->val); + DL_DELETE(bucket->available_slabs, slab_it); + bucket->available_slabs_num--; + destroy_slab(slab_it->val); + } + } + + utils_mutex_unlock(&bucket->bucket_lock); +} + +void bucket_count_alloc(bucket_t *bucket, bool from_pool) { + ++bucket->alloc_count; + if (from_pool) { + ++bucket->alloc_pool_count; + } +} + +void *bucket_get_chunk(bucket_t *bucket, bool *from_pool) { + utils_mutex_lock(&bucket->bucket_lock); + + slab_list_item_t *slab_it = bucket_get_avail_slab(bucket, from_pool); + if (slab_it == NULL) { + utils_mutex_unlock(&bucket->bucket_lock); + return NULL; + } + + void *free_chunk = slab_get_chunk(slab_it->val); + + // if the slab is full, move it to unavailable slabs and update its iterator + if (!(slab_has_avail(slab_it->val))) { + DL_DELETE(bucket->available_slabs, slab_it); + bucket->available_slabs_num++; + slab_it->prev = NULL; + DL_PREPEND(bucket->unavailable_slabs, slab_it); + } + + utils_mutex_unlock(&bucket->bucket_lock); + return free_chunk; +} + +size_t bucket_chunk_cut_off(bucket_t *bucket) { + return bucket_slab_min_size(bucket) / 2; +} + +size_t bucket_slab_alloc_size(bucket_t *bucket) { + return utils_max(bucket->size, bucket_slab_min_size(bucket)); +} + +size_t bucket_slab_min_size(bucket_t *bucket) { + return bucket->pool->params.slab_min_size; +} + +slab_list_item_t *bucket_get_avail_full_slab(bucket_t *bucket, + bool *from_pool) { + // return a slab that will be used for a single allocation + if (bucket->available_slabs == NULL) { + slab_t *slab = create_slab(bucket); + if (slab == NULL) { + LOG_ERR("create_slab failed!") + return NULL; + } + + slab_reg(slab); + DL_PREPEND(bucket->available_slabs, slab->iter); + bucket->available_slabs_num++; + *from_pool = false; + bucket_update_stats(bucket, 1, 0); + } else { + bucket_decrement_pool(bucket, from_pool); + } + + return bucket->available_slabs; +} + +void *bucket_get_slab(bucket_t *bucket, bool *from_pool) { + utils_mutex_lock(&bucket->bucket_lock); + + slab_list_item_t *slab_it = bucket_get_avail_full_slab(bucket, from_pool); + if (slab_it == NULL) { + utils_mutex_unlock(&bucket->bucket_lock); + return NULL; + } + + slab_t *slab = slab_it->val; + void *ptr = slab_get(slab); + + DL_DELETE(bucket->available_slabs, slab_it); + bucket->available_slabs_num--; + slab_it->prev = NULL; + DL_PREPEND(bucket->unavailable_slabs, slab_it); + + utils_mutex_unlock(&bucket->bucket_lock); + return ptr; +} + +void bucket_free_slab(bucket_t *bucket, slab_t *slab, bool *to_pool) { + utils_mutex_lock(&bucket->bucket_lock); + + slab_list_item_t *slab_it = slab->iter; + assert(slab_it->val != NULL); + if (bucket_can_pool(bucket, to_pool)) { + DL_DELETE(bucket->unavailable_slabs, slab_it); + slab_it->prev = NULL; + DL_PREPEND(bucket->available_slabs, slab_it); + bucket->available_slabs_num++; + } else { + slab_unreg(slab_it->val); + DL_DELETE(bucket->unavailable_slabs, slab_it); + destroy_slab(slab_it->val); + } + utils_mutex_unlock(&bucket->bucket_lock); +} + +slab_list_item_t *bucket_get_avail_slab(bucket_t *bucket, bool *from_pool) { + if (bucket->available_slabs == NULL) { + slab_t *slab = create_slab(bucket); + if (slab == NULL) { + LOG_ERR("create_slab failed!") + return NULL; + } + + slab_reg(slab); + DL_PREPEND(bucket->available_slabs, slab->iter); + bucket->available_slabs_num++; + bucket_update_stats(bucket, 1, 0); + *from_pool = false; + } else { + slab_t *slab = bucket->available_slabs->val; + if (slab->num_allocated == 0) { + // If this was an empty slab, it was in the pool. + // Now it is no longer in the pool, so update count. + --bucket->chunked_slabs_in_pool; + bucket_decrement_pool(bucket, from_pool); + } else { + // Allocation from existing slab is treated as from pool for statistics. + *from_pool = true; + } + } + + return bucket->available_slabs; +} + +size_t bucket_capacity(bucket_t *bucket) { + // For buckets used in chunked mode, just one slab in pool is sufficient. + // For larger buckets, the capacity could be more and is adjustable. + if (bucket->size <= bucket_chunk_cut_off(bucket)) { + return 1; + } else { + return bucket->pool->params.capacity; + } +} + +void bucket_update_stats(bucket_t *bucket, int in_use, int in_pool) { + if (bucket->pool->params.pool_trace == 0) { + return; + } + + bucket->curr_slabs_in_use += in_use; + bucket->max_slabs_in_use = + utils_max(bucket->curr_slabs_in_use, bucket->max_slabs_in_use); + + bucket->curr_slabs_in_pool += in_pool; + bucket->max_slabs_in_pool = + utils_max(bucket->curr_slabs_in_pool, bucket->max_slabs_in_pool); + + // Increment or decrement current pool sizes based on whether + // slab was added to or removed from pool. + bucket->pool->params.cur_pool_size += + in_pool * bucket_slab_alloc_size(bucket); +} + +void bucket_decrement_pool(bucket_t *bucket, bool *from_pool) { + // If a slab was available in the pool then note that the current pooled + // size has reduced by the size of a slab in this bucket. + *from_pool = true; + bucket_update_stats(bucket, 1, -1); + utils_fetch_and_add64(&bucket->shared_limits->total_size, + -(long long)bucket_slab_alloc_size(bucket)); +} + +bool bucket_can_pool(bucket_t *bucket, bool *to_pool) { + size_t new_free_slabs_in_bucket; + + // check if this bucket is used in chunked form or as full slabs + bool chunked_bucket = bucket->size <= bucket_chunk_cut_off(bucket); + if (chunked_bucket) { + new_free_slabs_in_bucket = bucket->chunked_slabs_in_pool + 1; + } else { + new_free_slabs_in_bucket = bucket->available_slabs_num; + } + + if (bucket_capacity(bucket) >= new_free_slabs_in_bucket) { + size_t pool_size = 0; + utils_atomic_load_acquire(&bucket->shared_limits->total_size, + &pool_size); + while (true) { + size_t new_pool_size = pool_size + bucket_slab_alloc_size(bucket); + + if (bucket->shared_limits->max_size < new_pool_size) { + break; + } + +// TODO!!! +#ifdef _WIN32 + if (bucket->shared_limits->total_size != new_pool_size) { + bucket->shared_limits->total_size = new_pool_size; +#else + if (utils_compare_exchange(&bucket->shared_limits->total_size, + &pool_size, &new_pool_size)) { +#endif + if (chunked_bucket) { + ++bucket->chunked_slabs_in_pool; + } + + bucket_update_stats(bucket, -1, 1); + *to_pool = true; + return true; + } + } + } + + bucket_update_stats(bucket, -1, 0); + *to_pool = false; + return false; +} + +utils_mutex_t *bucket_get_known_slabs_map_lock(bucket_t *bucket) { + return &bucket->pool->known_slabs_map_lock; +} + +void slab_reg_by_addr(void *addr, slab_t *slab) { + bucket_t *bucket = slab->bucket; + disjoint_pool_t *pool = bucket->pool; + utils_mutex_t *lock = &pool->known_slabs_map_lock; + critnib *slabs = pool->known_slabs; + + utils_mutex_lock(lock); + + // NODE: in the original Disjoint Pool implementation slabs was defined as + // a multimap + LOG_DEBUG("addr: %p, slab: %p\n", addr, (void *)slab); + int ret = critnib_insert(slabs, (uintptr_t)addr, slab, 0); + if (ret == ENOMEM) { + LOG_ERR("register failed because of out of memory!"); + } else if (ret == EEXIST) { + LOG_ERR("register failed because the address is already registered!"); + } + + utils_mutex_unlock(lock); +} + +void slab_unreg_by_addr(void *addr, slab_t *slab) { + bucket_t *bucket = slab->bucket; + disjoint_pool_t *pool = bucket->pool; + utils_mutex_t *lock = &pool->known_slabs_map_lock; + critnib *slabs = pool->known_slabs; + + utils_mutex_lock(lock); + +#ifndef NDEBUG + // debug only + // assume single-value per key + slab_t *known_slab = (slab_t *)critnib_get(slabs, (uintptr_t)addr); + assert(known_slab != NULL && "Slab is not found"); + assert(slab == known_slab); + (void)known_slab; +#endif + + LOG_DEBUG("addr: %p, slab: %p\n", addr, (void *)slab); + critnib_remove(slabs, (uintptr_t)addr); + + utils_mutex_unlock(lock); +} + +static size_t size_to_idx(disjoint_pool_t *pool, size_t size) { + assert(size <= CutOff && "Unexpected size"); + assert(size > 0 && "Unexpected size"); + + size_t min_bucket_size = (size_t)1 << pool->min_bucket_size_exp; + if (size < min_bucket_size) { + return 0; + } + + // get the position of the leftmost set bit + size_t position = getLeftmostSetBitPos(size); + + bool is_power_of_2 = 0 == (size & (size - 1)); + bool larger_than_halfway_between_powers_of_2 = + !is_power_of_2 && + (bool)((size - 1) & ((uint64_t)(1) << (position - 1))); + size_t index = (position - pool->min_bucket_size_exp) * 2 + + (int)(!is_power_of_2) + + (int)larger_than_halfway_between_powers_of_2; + + return index; +} + +umf_disjoint_pool_shared_limits_t * +disjoint_pool_get_limits(disjoint_pool_t *pool) { + if (pool->params.shared_limits) { + return pool->params.shared_limits; + } else { + return pool->default_shared_limits; + } +} + +bucket_t *disjoint_pool_find_bucket(disjoint_pool_t *pool, size_t size) { + size_t calculated_idx = size_to_idx(pool, size); + +#ifndef NDEBUG + // debug check + bucket_t *bucket = pool->buckets[calculated_idx]; + assert(bucket->size >= size); + (void)bucket; + + if (calculated_idx > 0) { + bucket_t *bucket_prev = pool->buckets[calculated_idx - 1]; + assert(bucket_prev->size < size); + (void)bucket_prev; + } +#endif // NDEBUG + + return pool->buckets[calculated_idx]; +} + +void bucket_print_stats(bucket_t *bucket, bool *title_printed, + const char *label) { + if (bucket->alloc_count) { + if (!*title_printed) { + LOG_DEBUG("%s memory statistics", label); + LOG_DEBUG("%14s %12s %12s %18s %20s %21s", "Bucket Size", "Allocs", + "Frees", "Allocs from Pool", "Peak Slabs in Use", + "Peak Slabs in Pool"); + *title_printed = true; + } + LOG_DEBUG("%14zu %12zu %12zu %18zu %20zu %21zu", bucket->size, + bucket->alloc_count, bucket->free_count, + bucket->alloc_pool_count, bucket->max_slabs_in_use, + bucket->max_slabs_in_pool); + } +} + +void disjoint_pool_print_stats(disjoint_pool_t *pool, bool *title_printed, + size_t *high_bucket_size, + size_t *high_peak_slabs_in_use, + const char *mt_name) { + *high_bucket_size = 0; + *high_peak_slabs_in_use = 0; + for (size_t i = 0; i < pool->buckets_num; i++) { + bucket_t *bucket = pool->buckets[i]; + bucket_print_stats(bucket, title_printed, mt_name); + *high_peak_slabs_in_use = + utils_max(bucket->max_slabs_in_use, *high_peak_slabs_in_use); + if (bucket->alloc_count) { + *high_bucket_size = + utils_max(bucket_slab_alloc_size(bucket), *high_bucket_size); + } + } +} + +void *disjoint_pool_allocate(disjoint_pool_t *pool, size_t size, + bool *from_pool) { + if (size == 0) { + return NULL; + } + + void *ptr = NULL; + + if (size > pool->params.max_poolable_size) { + umf_result_t ret = + umfMemoryProviderAlloc(pool->provider, size, 0, &ptr); + if (ret != UMF_RESULT_SUCCESS) { + TLS_last_allocation_error = ret; + return NULL; + } + + annotate_memory_undefined(ptr, size); + return ptr; + } + + *from_pool = false; + bucket_t *bucket = disjoint_pool_find_bucket(pool, size); + + if (size > bucket_chunk_cut_off(bucket)) { + ptr = bucket_get_slab(bucket, from_pool); + } else { + ptr = bucket_get_chunk(bucket, from_pool); + } + + if (ptr == NULL) { + TLS_last_allocation_error = UMF_RESULT_ERROR_OUT_OF_HOST_MEMORY; + return NULL; + } + + if (pool->params.pool_trace > 1) { + bucket_count_alloc(bucket, from_pool); + } + + VALGRIND_DO_MEMPOOL_ALLOC(pool, ptr, size); + annotate_memory_undefined(ptr, bucket->size); + + return ptr; +} + +umf_result_t disjoint_pool_initialize(umf_memory_provider_handle_t provider, + void *params, void **ppPool) { + if (!provider) { + return UMF_RESULT_ERROR_INVALID_ARGUMENT; + } + + disjoint_pool_t *disjoint_pool = + (disjoint_pool_t *)umf_ba_global_alloc(sizeof(struct disjoint_pool_t)); + if (!disjoint_pool) { + return UMF_RESULT_ERROR_OUT_OF_HOST_MEMORY; + } + + umf_disjoint_pool_params_t *dp_params = + (umf_disjoint_pool_params_t *)params; + + // min_bucket_size parameter must be a power of 2 for bucket sizes + // to generate correctly. + if (!dp_params->min_bucket_size || + !((dp_params->min_bucket_size & (dp_params->min_bucket_size - 1)) == + 0)) { + return UMF_RESULT_ERROR_INVALID_ARGUMENT; + } + + VALGRIND_DO_CREATE_MEMPOOL(disjoint_pool, 0, 0); + + disjoint_pool->provider = provider; + disjoint_pool->params = *dp_params; + + utils_mutex_init(&disjoint_pool->known_slabs_map_lock); + disjoint_pool->known_slabs = critnib_new(); + + // Generate buckets sized such as: 64, 96, 128, 192, ..., CutOff. + // Powers of 2 and the value halfway between the powers of 2. + size_t Size1 = disjoint_pool->params.min_bucket_size; + + // min_bucket_size cannot be larger than CutOff. + Size1 = utils_min(Size1, CutOff); + + // Buckets sized smaller than the bucket default size- 8 aren't needed. + Size1 = utils_max(Size1, UMF_DISJOINT_POOL_MIN_BUCKET_DEFAULT_SIZE); + + // Calculate the exponent for min_bucket_size used for finding buckets. + disjoint_pool->min_bucket_size_exp = (size_t)log2Utils(Size1); + disjoint_pool->default_shared_limits = + umfDisjointPoolSharedLimitsCreate(SIZE_MAX); + + // count number of buckets, start from 1 + disjoint_pool->buckets_num = 1; + size_t Size2 = Size1 + Size1 / 2; + size_t ts2 = Size2, ts1 = Size1; + for (; Size2 < CutOff; Size1 *= 2, Size2 *= 2) { + disjoint_pool->buckets_num += 2; + } + disjoint_pool->buckets = (bucket_t **)umf_ba_global_alloc( + sizeof(bucket_t *) * disjoint_pool->buckets_num); + + int i = 0; + Size1 = ts1; + Size2 = ts2; + for (; Size2 < CutOff; Size1 *= 2, Size2 *= 2, i += 2) { + disjoint_pool->buckets[i] = create_bucket( + Size1, disjoint_pool, disjoint_pool_get_limits(disjoint_pool)); + disjoint_pool->buckets[i + 1] = create_bucket( + Size2, disjoint_pool, disjoint_pool_get_limits(disjoint_pool)); + } + disjoint_pool->buckets[i] = create_bucket( + CutOff, disjoint_pool, disjoint_pool_get_limits(disjoint_pool)); + + umf_result_t ret = umfMemoryProviderGetMinPageSize( + provider, NULL, &disjoint_pool->provider_min_page_size); + if (ret != UMF_RESULT_SUCCESS) { + disjoint_pool->provider_min_page_size = 0; + } + + *ppPool = (void *)disjoint_pool; + + return UMF_RESULT_SUCCESS; +} + +void *disjoint_pool_malloc(void *pool, size_t size) { + // For full-slab allocations indicates whether slab is from Pool. + disjoint_pool_t *hPool = (disjoint_pool_t *)pool; + + bool from_pool; + void *ptr = disjoint_pool_allocate(hPool, size, &from_pool); + + if (hPool->params.pool_trace > 2) { + LOG_DEBUG("Allocated %8zu %s bytes from %s -> %p", size, + hPool->params.name, (from_pool ? "pool" : "provider"), ptr); + } + + return ptr; +} + +void *disjoint_pool_calloc(void *pool, size_t num, size_t size) { + (void)pool; + (void)num; + (void)size; + + // Not supported + TLS_last_allocation_error = UMF_RESULT_ERROR_NOT_SUPPORTED; + return NULL; +} + +void *disjoint_pool_realloc(void *pool, void *ptr, size_t size) { + (void)pool; + (void)ptr; + (void)size; + + // Not supported + TLS_last_allocation_error = UMF_RESULT_ERROR_NOT_SUPPORTED; + return NULL; +} + +void *disjoint_pool_aligned_malloc(void *pool, size_t size, size_t alignment) { + disjoint_pool_t *disjoint_pool = (disjoint_pool_t *)pool; + + bool from_pool; + void *ptr = NULL; + + if (size == 0) { + return NULL; + } + + if (alignment <= 1) { + return disjoint_pool_allocate(pool, size, &from_pool); + } + + size_t aligned_size; + if (alignment <= disjoint_pool->provider_min_page_size) { + // This allocation will be served from a Bucket which size is multiple + // of Alignment and Slab address is aligned to provider_min_page_size + // so the address will be properly aligned. + aligned_size = (size > 1) ? ALIGN_UP(size, alignment) : alignment; + } else { + // Slabs are only aligned to provider_min_page_size, we need to compensate + // for that in case the allocation is within pooling limit. + // TODO: consider creating properly-aligned Slabs on demand + aligned_size = size + alignment - 1; + } + + // Check if requested allocation size is within pooling limit. + // If not, just request aligned pointer from the system. + from_pool = false; + if (aligned_size > disjoint_pool->params.max_poolable_size) { + + umf_result_t ret = umfMemoryProviderAlloc(disjoint_pool->provider, size, + alignment, &ptr); + if (ret != UMF_RESULT_SUCCESS) { + TLS_last_allocation_error = ret; + return NULL; + } + + assert(ptr); + annotate_memory_undefined(ptr, size); + return ptr; + } + + bucket_t *bucket = disjoint_pool_find_bucket(pool, aligned_size); + + if (aligned_size > bucket_chunk_cut_off(bucket)) { + ptr = bucket_get_slab(bucket, &from_pool); + } else { + ptr = bucket_get_chunk(bucket, &from_pool); + } + + assert(ptr); + if (disjoint_pool->params.pool_trace > 1) { + bucket_count_alloc(bucket, from_pool); + } + + if (disjoint_pool->params.pool_trace > 2) { + LOG_DEBUG("Allocated %8zu %s bytes aligned at %zu from %s -> %p", size, + disjoint_pool->params.name, alignment, + (from_pool ? "pool" : "provider"), ptr); + } + + VALGRIND_DO_MEMPOOL_ALLOC(disjoint_pool, ALIGN_UP((size_t)ptr, alignment), + size); + annotate_memory_undefined((void *)ALIGN_UP((size_t)ptr, alignment), size); + return (void *)ALIGN_UP((size_t)ptr, alignment); +} + +size_t disjoint_pool_malloc_usable_size(void *pool, void *ptr) { + (void)pool; + (void)ptr; + + // Not supported + return 0; +} + +umf_result_t disjoint_pool_free(void *pool, void *ptr) { + disjoint_pool_t *disjoint_pool = (disjoint_pool_t *)pool; + if (ptr == NULL) { + return UMF_RESULT_SUCCESS; + } + + void *slab_ptr = + (void *)ALIGN_DOWN((size_t)ptr, disjoint_pool->params.slab_min_size); + + // Lock the map on read + utils_mutex_lock(&disjoint_pool->known_slabs_map_lock); + + slab_t *slab = + (slab_t *)critnib_get(disjoint_pool->known_slabs, (uintptr_t)slab_ptr); + + // check if given pointer is allocated inside any Disjoint Pool slab + if (slab == NULL) { + utils_mutex_unlock(&disjoint_pool->known_slabs_map_lock); + + // regular free + umf_alloc_info_t allocInfo = {NULL, 0, NULL}; + umf_result_t ret = umfMemoryTrackerGetAllocInfo(ptr, &allocInfo); + if (ret != UMF_RESULT_SUCCESS) { + TLS_last_allocation_error = ret; + return ret; + } + + size_t size = allocInfo.baseSize; + umf_memory_provider_handle_t provider = disjoint_pool->provider; + ret = umfMemoryProviderFree(provider, ptr, size); + if (ret != UMF_RESULT_SUCCESS) { + TLS_last_allocation_error = ret; + } + + return ret; + } + + bool to_pool = false; + + if (ptr < slab_get(slab) || ptr >= slab_get_end(slab)) { + // There is a rare case when we have a pointer from system allocation + // next to some slab with an entry in the map. So we find a slab but + // the range checks fail. + assert(0); + return UMF_RESULT_ERROR_MEMORY_PROVIDER_SPECIFIC; + } + + // The slab object won't be deleted until it's removed from the map which is + // protected by the lock, so it's safe to access it here. + + // Unlock the map before freeing the chunk, it may be locked on write + // there + utils_mutex_unlock(&disjoint_pool->known_slabs_map_lock); + bucket_t *bucket = slab->bucket; + + if (disjoint_pool->params.pool_trace > 1) { + bucket->free_count++; + } + + VALGRIND_DO_MEMPOOL_FREE(pool, ptr); + annotate_memory_inaccessible(ptr, bucket->size); + if (bucket->size <= bucket_chunk_cut_off(bucket)) { + bucket_free_chunk(bucket, ptr, slab, &to_pool); + } else { + bucket_free_slab(bucket, slab, &to_pool); + } + + if (disjoint_pool->params.pool_trace > 2) { + const char *name = disjoint_pool->params.name; + LOG_DEBUG("freed %s %p to %s, current total pool size: %zu, current " + "pool size for %s: %zu", + name, ptr, (to_pool ? "pool" : "provider"), + disjoint_pool_get_limits(disjoint_pool)->total_size, name, + disjoint_pool->params.cur_pool_size); + } + + return UMF_RESULT_SUCCESS; +} + +umf_result_t disjoint_pool_get_last_allocation_error(void *pool) { + (void)pool; + + return TLS_last_allocation_error; +} + +// Define destructor for use with unique_ptr +void disjoint_pool_finalize(void *pool) { + + disjoint_pool_t *hPool = (disjoint_pool_t *)pool; + + if (hPool->params.pool_trace > 1) { + bool title_printed = false; + size_t high_bucket_size; + size_t high_peak_slabs_in_use; + const char *name = hPool->params.name; + + disjoint_pool_print_stats(hPool, &title_printed, &high_bucket_size, + &high_peak_slabs_in_use, name); + if (title_printed) { + LOG_DEBUG("current pool size: %zu", + disjoint_pool_get_limits(hPool)->total_size); + LOG_DEBUG("suggested setting=;%c%s:%zu,%zu,64K", + (char)tolower(name[0]), (name + 1), high_bucket_size, + high_peak_slabs_in_use); + } + } + + for (size_t i = 0; i < hPool->buckets_num; i++) { + destroy_bucket(hPool->buckets[i]); + } + + VALGRIND_DO_DESTROY_MEMPOOL(hPool); + + umfDisjointPoolSharedLimitsDestroy(hPool->default_shared_limits); + critnib_delete(hPool->known_slabs); + + utils_mutex_destroy_not_free(&hPool->known_slabs_map_lock); + + umf_ba_global_free(hPool); +} + +static umf_memory_pool_ops_t UMF_DISJOINT_POOL_OPS = { + .version = UMF_VERSION_CURRENT, + .initialize = disjoint_pool_initialize, + .finalize = disjoint_pool_finalize, + .malloc = disjoint_pool_malloc, + .calloc = disjoint_pool_calloc, + .realloc = disjoint_pool_realloc, + .aligned_malloc = disjoint_pool_aligned_malloc, + .malloc_usable_size = disjoint_pool_malloc_usable_size, + .free = disjoint_pool_free, + .get_last_allocation_error = disjoint_pool_get_last_allocation_error, +}; + +umf_memory_pool_ops_t *umfDisjointPoolOps(void) { + return &UMF_DISJOINT_POOL_OPS; +} + +umf_disjoint_pool_shared_limits_t * +umfDisjointPoolSharedLimitsCreate(size_t max_size) { + umf_disjoint_pool_shared_limits_t *ptr = + umf_ba_global_alloc(sizeof(umf_disjoint_pool_shared_limits_t)); + ptr->max_size = max_size; + ptr->total_size = 0; + return ptr; +} + +void umfDisjointPoolSharedLimitsDestroy( + umf_disjoint_pool_shared_limits_t *limits) { + umf_ba_global_free(limits); +} + +umf_result_t +umfDisjointPoolParamsCreate(umf_disjoint_pool_params_handle_t *hParams) { + static const char *DEFAULT_NAME = "disjoint_pool"; + + if (!hParams) { + LOG_ERR("disjoint pool params handle is NULL"); + return UMF_RESULT_ERROR_INVALID_ARGUMENT; + } + + umf_disjoint_pool_params_handle_t params = + umf_ba_global_alloc(sizeof(umf_disjoint_pool_params_t)); + if (params == NULL) { + LOG_ERR("cannot allocate memory for disjoint pool params"); + return UMF_RESULT_ERROR_OUT_OF_HOST_MEMORY; + } + + params->slab_min_size = 0; + params->max_poolable_size = 0; + params->capacity = 0; + params->min_bucket_size = UMF_DISJOINT_POOL_MIN_BUCKET_DEFAULT_SIZE; + params->cur_pool_size = 0; + params->pool_trace = 0; + params->shared_limits = NULL; + params->name = NULL; + + umf_result_t ret = umfDisjointPoolParamsSetName(params, DEFAULT_NAME); + if (ret != UMF_RESULT_SUCCESS) { + umf_ba_global_free(params); + return ret; + } + + *hParams = params; + + return UMF_RESULT_SUCCESS; +} + +umf_result_t +umfDisjointPoolParamsDestroy(umf_disjoint_pool_params_handle_t hParams) { + // NOTE: dereferencing hParams when BA is already destroyed leads to crash + if (hParams && !umf_ba_global_is_destroyed()) { + umf_ba_global_free(hParams->name); + umf_ba_global_free(hParams); + } + + return UMF_RESULT_SUCCESS; +} + +umf_result_t +umfDisjointPoolParamsSetSlabMinSize(umf_disjoint_pool_params_handle_t hParams, + size_t slabMinSize) { + if (!hParams) { + LOG_ERR("disjoint pool params handle is NULL"); + return UMF_RESULT_ERROR_INVALID_ARGUMENT; + } + + hParams->slab_min_size = slabMinSize; + return UMF_RESULT_SUCCESS; +} + +umf_result_t umfDisjointPoolParamsSetMaxPoolableSize( + umf_disjoint_pool_params_handle_t hParams, size_t maxPoolableSize) { + if (!hParams) { + LOG_ERR("disjoint pool params handle is NULL"); + return UMF_RESULT_ERROR_INVALID_ARGUMENT; + } + + hParams->max_poolable_size = maxPoolableSize; + return UMF_RESULT_SUCCESS; +} + +umf_result_t +umfDisjointPoolParamsSetCapacity(umf_disjoint_pool_params_handle_t hParams, + size_t maxCapacity) { + if (!hParams) { + LOG_ERR("disjoint pool params handle is NULL"); + return UMF_RESULT_ERROR_INVALID_ARGUMENT; + } + + hParams->capacity = maxCapacity; + return UMF_RESULT_SUCCESS; +} + +umf_result_t +umfDisjointPoolParamsSetMinBucketSize(umf_disjoint_pool_params_handle_t hParams, + size_t minBucketSize) { + if (!hParams) { + LOG_ERR("disjoint pool params handle is NULL"); + return UMF_RESULT_ERROR_INVALID_ARGUMENT; + } + + // minBucketSize parameter must be a power of 2 and greater than 0. + if (minBucketSize == 0 || (minBucketSize & (minBucketSize - 1))) { + LOG_ERR("minBucketSize must be a power of 2 and greater than 0"); + return UMF_RESULT_ERROR_INVALID_ARGUMENT; + } + + hParams->min_bucket_size = minBucketSize; + return UMF_RESULT_SUCCESS; +} + +umf_result_t +umfDisjointPoolParamsSetTrace(umf_disjoint_pool_params_handle_t hParams, + int poolTrace) { + if (!hParams) { + LOG_ERR("disjoint pool params handle is NULL"); + return UMF_RESULT_ERROR_INVALID_ARGUMENT; + } + + hParams->pool_trace = poolTrace; + return UMF_RESULT_SUCCESS; +} + +umf_result_t umfDisjointPoolParamsSetSharedLimits( + umf_disjoint_pool_params_handle_t hParams, + umf_disjoint_pool_shared_limits_handle_t hSharedLimits) { + if (!hParams) { + LOG_ERR("disjoint pool params handle is NULL"); + return UMF_RESULT_ERROR_INVALID_ARGUMENT; + } + + hParams->shared_limits = hSharedLimits; + return UMF_RESULT_SUCCESS; +} + +umf_result_t +umfDisjointPoolParamsSetName(umf_disjoint_pool_params_handle_t hParams, + const char *name) { + if (!hParams) { + LOG_ERR("disjoint pool params handle is NULL"); + return UMF_RESULT_ERROR_INVALID_ARGUMENT; + } + + char *newName = umf_ba_global_alloc(sizeof(char) * (strlen(name) + 1)); + if (newName == NULL) { + LOG_ERR("cannot allocate memory for disjoint pool name"); + return UMF_RESULT_ERROR_OUT_OF_HOST_MEMORY; + } + + umf_ba_global_free(hParams->name); + hParams->name = newName; + strcpy(hParams->name, name); + + return UMF_RESULT_SUCCESS; +} diff --git a/src/pool/pool_disjoint.cpp b/src/pool/pool_disjoint.cpp deleted file mode 100644 index e0298b43d..000000000 --- a/src/pool/pool_disjoint.cpp +++ /dev/null @@ -1,1313 +0,0 @@ -// Copyright (C) 2023 Intel Corporation -// Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -// TODO: replace with logger? -#include - -#include "provider/provider_tracking.h" - -#include "../cpp_helpers.hpp" -#include "pool_disjoint.h" -#include "umf.h" -#include "utils_log.h" -#include "utils_math.h" -#include "utils_sanitizers.h" - -// Temporary solution for disabling memory poisoning. This is needed because -// AddressSanitizer does not support memory poisoning for GPU allocations. -// More info: https://github.com/oneapi-src/unified-memory-framework/issues/634 -#ifndef POISON_MEMORY -#define POISON_MEMORY 0 -#endif - -static inline void annotate_memory_inaccessible([[maybe_unused]] void *ptr, - [[maybe_unused]] size_t size) { -#if (POISON_MEMORY != 0) - utils_annotate_memory_inaccessible(ptr, size); -#endif -} - -static inline void annotate_memory_undefined([[maybe_unused]] void *ptr, - [[maybe_unused]] size_t size) { -#if (POISON_MEMORY != 0) - utils_annotate_memory_undefined(ptr, size); -#endif -} - -typedef struct umf_disjoint_pool_shared_limits_t { - size_t MaxSize; - std::atomic TotalSize; -} umf_disjoint_pool_shared_limits_t; - -// Configuration of Disjoint Pool -typedef struct umf_disjoint_pool_params_t { - // Minimum allocation size that will be requested from the memory provider. - size_t SlabMinSize; - - // Allocations up to this limit will be subject to chunking/pooling - size_t MaxPoolableSize; - - // When pooling, each bucket will hold a max of 'Capacity' unfreed slabs - size_t Capacity; - - // Holds the minimum bucket size valid for allocation of a memory type. - // This value must be a power of 2. - size_t MinBucketSize; - - // Holds size of the pool managed by the allocator. - size_t CurPoolSize; - - // Whether to print pool usage statistics - int PoolTrace; - - // Memory limits that can be shared between multitple pool instances, - // i.e. if multiple pools use the same SharedLimits sum of those pools' - // sizes cannot exceed MaxSize. - umf_disjoint_pool_shared_limits_handle_t SharedLimits; - - // Name used in traces - char *Name; -} umf_disjoint_pool_params_t; - -class DisjointPool { - public: - class AllocImpl; - using Config = umf_disjoint_pool_params_t; - - umf_result_t initialize(umf_memory_provider_handle_t provider, - umf_disjoint_pool_params_handle_t parameters); - void *malloc(size_t size); - void *calloc(size_t, size_t); - void *realloc(void *, size_t); - void *aligned_malloc(size_t size, size_t alignment); - size_t malloc_usable_size(void *); - umf_result_t free(void *ptr); - umf_result_t get_last_allocation_error(); - - DisjointPool(); - ~DisjointPool(); - - private: - std::unique_ptr impl; -}; - -umf_disjoint_pool_shared_limits_t * -umfDisjointPoolSharedLimitsCreate(size_t MaxSize) { - return new umf_disjoint_pool_shared_limits_t{MaxSize, 0}; -} - -void umfDisjointPoolSharedLimitsDestroy( - umf_disjoint_pool_shared_limits_handle_t hSharedLimits) { - delete hSharedLimits; -} - -umf_result_t -umfDisjointPoolParamsCreate(umf_disjoint_pool_params_handle_t *hParams) { - static const char *DEFAULT_NAME = "disjoint_pool"; - - if (!hParams) { - LOG_ERR("disjoint pool params handle is NULL"); - return UMF_RESULT_ERROR_INVALID_ARGUMENT; - } - - umf_disjoint_pool_params_handle_t params = new umf_disjoint_pool_params_t{}; - if (params == nullptr) { - LOG_ERR("cannot allocate memory for disjoint pool params"); - return UMF_RESULT_ERROR_OUT_OF_HOST_MEMORY; - } - - params->SlabMinSize = 0; - params->MaxPoolableSize = 0; - params->Capacity = 0; - params->MinBucketSize = UMF_DISJOINT_POOL_MIN_BUCKET_DEFAULT_SIZE; - params->CurPoolSize = 0; - params->PoolTrace = 0; - params->SharedLimits = nullptr; - params->Name = nullptr; - - umf_result_t ret = umfDisjointPoolParamsSetName(params, DEFAULT_NAME); - if (ret != UMF_RESULT_SUCCESS) { - delete params; - return ret; - } - - *hParams = params; - - return UMF_RESULT_SUCCESS; -} - -umf_result_t -umfDisjointPoolParamsDestroy(umf_disjoint_pool_params_handle_t hParams) { - if (hParams) { - delete[] hParams->Name; - delete hParams; - } - - return UMF_RESULT_SUCCESS; -} - -umf_result_t -umfDisjointPoolParamsSetSlabMinSize(umf_disjoint_pool_params_handle_t hParams, - size_t slabMinSize) { - if (!hParams) { - LOG_ERR("disjoint pool params handle is NULL"); - return UMF_RESULT_ERROR_INVALID_ARGUMENT; - } - - hParams->SlabMinSize = slabMinSize; - return UMF_RESULT_SUCCESS; -} - -umf_result_t umfDisjointPoolParamsSetMaxPoolableSize( - umf_disjoint_pool_params_handle_t hParams, size_t maxPoolableSize) { - if (!hParams) { - LOG_ERR("disjoint pool params handle is NULL"); - return UMF_RESULT_ERROR_INVALID_ARGUMENT; - } - - hParams->MaxPoolableSize = maxPoolableSize; - return UMF_RESULT_SUCCESS; -} - -umf_result_t -umfDisjointPoolParamsSetCapacity(umf_disjoint_pool_params_handle_t hParams, - size_t maxCapacity) { - if (!hParams) { - LOG_ERR("disjoint pool params handle is NULL"); - return UMF_RESULT_ERROR_INVALID_ARGUMENT; - } - - hParams->Capacity = maxCapacity; - return UMF_RESULT_SUCCESS; -} - -umf_result_t -umfDisjointPoolParamsSetMinBucketSize(umf_disjoint_pool_params_handle_t hParams, - size_t minBucketSize) { - if (!hParams) { - LOG_ERR("disjoint pool params handle is NULL"); - return UMF_RESULT_ERROR_INVALID_ARGUMENT; - } - - // minBucketSize parameter must be a power of 2 and greater than 0. - if (minBucketSize == 0 || (minBucketSize & (minBucketSize - 1))) { - LOG_ERR("minBucketSize must be a power of 2 and greater than 0"); - return UMF_RESULT_ERROR_INVALID_ARGUMENT; - } - - hParams->MinBucketSize = minBucketSize; - return UMF_RESULT_SUCCESS; -} - -umf_result_t -umfDisjointPoolParamsSetTrace(umf_disjoint_pool_params_handle_t hParams, - int poolTrace) { - if (!hParams) { - LOG_ERR("disjoint pool params handle is NULL"); - return UMF_RESULT_ERROR_INVALID_ARGUMENT; - } - - hParams->PoolTrace = poolTrace; - return UMF_RESULT_SUCCESS; -} - -umf_result_t umfDisjointPoolParamsSetSharedLimits( - umf_disjoint_pool_params_handle_t hParams, - umf_disjoint_pool_shared_limits_handle_t hSharedLimits) { - if (!hParams) { - LOG_ERR("disjoint pool params handle is NULL"); - return UMF_RESULT_ERROR_INVALID_ARGUMENT; - } - - hParams->SharedLimits = hSharedLimits; - return UMF_RESULT_SUCCESS; -} - -umf_result_t -umfDisjointPoolParamsSetName(umf_disjoint_pool_params_handle_t hParams, - const char *name) { - if (!hParams) { - LOG_ERR("disjoint pool params handle is NULL"); - return UMF_RESULT_ERROR_INVALID_ARGUMENT; - } - - char *newName = new char[std::strlen(name) + 1]; - if (newName == nullptr) { - LOG_ERR("cannot allocate memory for disjoint pool name"); - return UMF_RESULT_ERROR_OUT_OF_HOST_MEMORY; - } - - delete[] hParams->Name; - hParams->Name = newName; - std::strcpy(hParams->Name, name); - - return UMF_RESULT_SUCCESS; -} - -// Allocations are a minimum of 4KB/64KB/2MB even when a smaller size is -// requested. The implementation distinguishes between allocations of size -// ChunkCutOff = (minimum-alloc-size / 2) and those that are larger. -// Allocation requests smaller than ChunkCutoff use chunks taken from a single -// coarse-grain allocation. Thus, for example, for a 64KB minimum allocation -// size, and 8-byte allocations, only 1 in ~8000 requests results in a new -// coarse-grain allocation. Freeing results only in a chunk of a larger -// allocation to be marked as available and no real return to the system. An -// allocation is returned to the system only when all chunks in the larger -// allocation are freed by the program. Allocations larger than ChunkCutOff use -// a separate coarse-grain allocation for each request. These are subject to -// "pooling". That is, when such an allocation is freed by the program it is -// retained in a pool. The pool is available for future allocations, which means -// there are fewer actual coarse-grain allocations/deallocations. - -// The largest size which is allocated via the allocator. -// Allocations with size > CutOff bypass the pool and -// go directly to the provider. -static constexpr size_t CutOff = (size_t)1 << 31; // 2GB - -// Aligns the pointer down to the specified alignment -// (e.g. returns 8 for Size = 13, Alignment = 8) -static void *AlignPtrDown(void *Ptr, const size_t Alignment) { - return reinterpret_cast((reinterpret_cast(Ptr)) & - (~(Alignment - 1))); -} - -// Aligns the pointer up to the specified alignment -// (e.g. returns 16 for Size = 13, Alignment = 8) -static void *AlignPtrUp(void *Ptr, const size_t Alignment) { - void *AlignedPtr = AlignPtrDown(Ptr, Alignment); - // Special case when the pointer is already aligned - if (Ptr == AlignedPtr) { - return Ptr; - } - return static_cast(AlignedPtr) + Alignment; -} - -// Aligns the value up to the specified alignment -// (e.g. returns 16 for Size = 13, Alignment = 8) -static size_t AlignUp(size_t Val, size_t Alignment) { - assert(Alignment > 0); - return (Val + Alignment - 1) & (~(Alignment - 1)); -} - -typedef struct MemoryProviderError { - umf_result_t code; -} MemoryProviderError_t; - -class Bucket; - -// Represents the allocated memory block of size 'SlabMinSize' -// Internally, it splits the memory block into chunks. The number of -// chunks depends of the size of a Bucket which created the Slab. -// Note: Bucket's methods are responsible for thread safety of Slab access, -// so no locking happens here. -class Slab { - - // Pointer to the allocated memory of SlabMinSize bytes - void *MemPtr; - - // Represents the current state of each chunk: - // if the bit is set then the chunk is allocated - // the chunk is free for allocation otherwise - std::vector Chunks; - - // Total number of allocated chunks at the moment. - size_t NumAllocated = 0; - - // The bucket which the slab belongs to - Bucket &bucket; - - using ListIter = std::list>::iterator; - - // Store iterator to the corresponding node in avail/unavail list - // to achieve O(1) removal - ListIter SlabListIter; - - // Hints where to start search for free chunk in a slab - size_t FirstFreeChunkIdx = 0; - - // Return the index of the first available chunk, SIZE_MAX otherwise - size_t FindFirstAvailableChunkIdx() const; - - // Register/Unregister the slab in the global slab address map. - void regSlab(Slab &); - void unregSlab(Slab &); - static void regSlabByAddr(void *, Slab &); - static void unregSlabByAddr(void *, Slab &); - - public: - Slab(Bucket &); - ~Slab(); - - void setIterator(ListIter It) { SlabListIter = It; } - ListIter getIterator() const { return SlabListIter; } - - size_t getNumAllocated() const { return NumAllocated; } - - // Get pointer to allocation that is one piece of this slab. - void *getChunk(); - - // Get pointer to allocation that is this entire slab. - void *getSlab(); - - void *getPtr() const { return MemPtr; } - void *getEnd() const; - - size_t getChunkSize() const; - size_t getNumChunks() const { return Chunks.size(); } - - bool hasAvail(); - - Bucket &getBucket(); - const Bucket &getBucket() const; - - void freeChunk(void *Ptr); -}; - -class Bucket { - const size_t Size; - - // List of slabs which have at least 1 available chunk. - std::list> AvailableSlabs; - - // List of slabs with 0 available chunk. - std::list> UnavailableSlabs; - - // Protects the bucket and all the corresponding slabs - std::mutex BucketLock; - - // Reference to the allocator context, used access memory allocation - // routines, slab map and etc. - DisjointPool::AllocImpl &OwnAllocCtx; - - // For buckets used in chunked mode, a counter of slabs in the pool. - // For allocations that use an entire slab each, the entries in the Available - // list are entries in the pool.Each slab is available for a new - // allocation.The size of the Available list is the size of the pool. - // For allocations that use slabs in chunked mode, slabs will be in the - // Available list if any one or more of their chunks is free.The entire slab - // is not necessarily free, just some chunks in the slab are free. To - // implement pooling we will allow one slab in the Available list to be - // entirely empty. Normally such a slab would have been freed. But - // now we don't, and treat this slab as "in the pool". - // When a slab becomes entirely free we have to decide whether to return it - // to the provider or keep it allocated. A simple check for size of the - // Available list is not sufficient to check whether any slab has been - // pooled yet. We would have to traverse the entire Available list and check - // if any of them is entirely free. Instead we keep a counter of entirely - // empty slabs within the Available list to speed up the process of checking - // if a slab in this bucket is already pooled. - size_t chunkedSlabsInPool; - - // Statistics - size_t allocPoolCount; - size_t freeCount; - size_t currSlabsInUse; - size_t currSlabsInPool; - size_t maxSlabsInPool; - - public: - // Statistics - size_t allocCount; - size_t maxSlabsInUse; - - Bucket(size_t Sz, DisjointPool::AllocImpl &AllocCtx) - : Size{Sz}, OwnAllocCtx{AllocCtx}, chunkedSlabsInPool(0), - allocPoolCount(0), freeCount(0), currSlabsInUse(0), - currSlabsInPool(0), maxSlabsInPool(0), allocCount(0), - maxSlabsInUse(0) {} - - // Get pointer to allocation that is one piece of an available slab in this - // bucket. - void *getChunk(bool &FromPool); - - // Get pointer to allocation that is a full slab in this bucket. - void *getSlab(bool &FromPool); - - // Return the allocation size of this bucket. - size_t getSize() const { return Size; } - - // Free an allocation that is one piece of a slab in this bucket. - void freeChunk(void *Ptr, Slab &Slab, bool &ToPool); - - // Free an allocation that is a full slab in this bucket. - void freeSlab(Slab &Slab, bool &ToPool); - - umf_memory_provider_handle_t getMemHandle(); - - DisjointPool::AllocImpl &getAllocCtx() { return OwnAllocCtx; } - - // Check whether an allocation to be freed can be placed in the pool. - bool CanPool(bool &ToPool); - - // The minimum allocation size for any slab. - size_t SlabMinSize(); - - // The allocation size for a slab in this bucket. - size_t SlabAllocSize(); - - // The minimum size of a chunk from this bucket's slabs. - size_t ChunkCutOff(); - - // The number of slabs in this bucket that can be in the pool. - size_t Capacity(); - - // The maximum allocation size subject to pooling. - size_t MaxPoolableSize(); - - // Update allocation count - void countAlloc(bool FromPool); - - // Update free count - void countFree(); - - // Update statistics of Available/Unavailable - void updateStats(int InUse, int InPool); - - // Print bucket statistics - void printStats(bool &TitlePrinted, const std::string &Label); - - private: - void onFreeChunk(Slab &, bool &ToPool); - - // Update statistics of pool usage, and indicate that an allocation was made - // from the pool. - void decrementPool(bool &FromPool); - - // Get a slab to be used for chunked allocations. - decltype(AvailableSlabs.begin()) getAvailSlab(bool &FromPool); - - // Get a slab that will be used as a whole for a single allocation. - decltype(AvailableSlabs.begin()) getAvailFullSlab(bool &FromPool); -}; - -class DisjointPool::AllocImpl { - // It's important for the map to be destroyed last after buckets and their - // slabs This is because slab's destructor removes the object from the map. - std::unordered_multimap KnownSlabs; - std::shared_timed_mutex KnownSlabsMapLock; - - // Handle to the memory provider - umf_memory_provider_handle_t MemHandle; - - // Store as unique_ptrs since Bucket is not Movable(because of std::mutex) - std::vector> Buckets; - - // Configuration for this instance - umf_disjoint_pool_params_t params; - - umf_disjoint_pool_shared_limits_t DefaultSharedLimits = { - (std::numeric_limits::max)(), 0}; - - // Used in algorithm for finding buckets - std::size_t MinBucketSizeExp; - - // Coarse-grain allocation min alignment - size_t ProviderMinPageSize; - - public: - AllocImpl(umf_memory_provider_handle_t hProvider, - umf_disjoint_pool_params_handle_t params) - : MemHandle{hProvider}, params(*params) { - - VALGRIND_DO_CREATE_MEMPOOL(this, 0, 0); - - // deep copy of the Name - this->params.Name = new char[std::strlen(params->Name) + 1]; - std::strcpy(this->params.Name, params->Name); - - // Generate buckets sized such as: 64, 96, 128, 192, ..., CutOff. - // Powers of 2 and the value halfway between the powers of 2. - auto Size1 = this->params.MinBucketSize; - // MinBucketSize cannot be larger than CutOff. - Size1 = std::min(Size1, CutOff); - // Buckets sized smaller than the bucket default size- 8 aren't needed. - Size1 = std::max(Size1, UMF_DISJOINT_POOL_MIN_BUCKET_DEFAULT_SIZE); - // Calculate the exponent for MinBucketSize used for finding buckets. - MinBucketSizeExp = (size_t)log2Utils(Size1); - auto Size2 = Size1 + Size1 / 2; - for (; Size2 < CutOff; Size1 *= 2, Size2 *= 2) { - Buckets.push_back(std::make_unique(Size1, *this)); - Buckets.push_back(std::make_unique(Size2, *this)); - } - Buckets.push_back(std::make_unique(CutOff, *this)); - - auto ret = umfMemoryProviderGetMinPageSize(hProvider, nullptr, - &ProviderMinPageSize); - if (ret != UMF_RESULT_SUCCESS) { - ProviderMinPageSize = 0; - } - } - - ~AllocImpl() { - VALGRIND_DO_DESTROY_MEMPOOL(this); - delete[] this->params.Name; - } - - void *allocate(size_t Size, size_t Alignment, bool &FromPool); - void *allocate(size_t Size, bool &FromPool); - void deallocate(void *Ptr, bool &ToPool); - - umf_memory_provider_handle_t getMemHandle() { return MemHandle; } - - std::shared_timed_mutex &getKnownSlabsMapLock() { - return KnownSlabsMapLock; - } - std::unordered_multimap &getKnownSlabs() { - return KnownSlabs; - } - - size_t SlabMinSize() { return params.SlabMinSize; }; - - umf_disjoint_pool_params_t &getParams() { return params; } - - umf_disjoint_pool_shared_limits_t *getLimits() { - if (params.SharedLimits) { - return params.SharedLimits; - } else { - return &DefaultSharedLimits; - } - }; - - void printStats(bool &TitlePrinted, size_t &HighBucketSize, - size_t &HighPeakSlabsInUse, const std::string &Label); - - private: - Bucket &findBucket(size_t Size); - std::size_t sizeToIdx(size_t Size); -}; - -static void *memoryProviderAlloc(umf_memory_provider_handle_t hProvider, - size_t size, size_t alignment = 0) { - void *ptr; - auto ret = umfMemoryProviderAlloc(hProvider, size, alignment, &ptr); - if (ret != UMF_RESULT_SUCCESS) { - throw MemoryProviderError{ret}; - } - annotate_memory_inaccessible(ptr, size); - return ptr; -} - -static void memoryProviderFree(umf_memory_provider_handle_t hProvider, - void *ptr) { - size_t size = 0; - - if (ptr) { - umf_alloc_info_t allocInfo = {NULL, 0, NULL}; - umf_result_t umf_result = umfMemoryTrackerGetAllocInfo(ptr, &allocInfo); - if (umf_result == UMF_RESULT_SUCCESS) { - size = allocInfo.baseSize; - } - } - - auto ret = umfMemoryProviderFree(hProvider, ptr, size); - if (ret != UMF_RESULT_SUCCESS) { - throw MemoryProviderError{ret}; - } -} - -bool operator==(const Slab &Lhs, const Slab &Rhs) { - return Lhs.getPtr() == Rhs.getPtr(); -} - -std::ostream &operator<<(std::ostream &Os, const Slab &Slab) { - Os << "Slab<" << Slab.getPtr() << ", " << Slab.getEnd() << ", " - << Slab.getBucket().getSize() << ">"; - return Os; -} - -Slab::Slab(Bucket &Bkt) - : // In case bucket size is not a multiple of SlabMinSize, we would have - // some padding at the end of the slab. - Chunks(Bkt.SlabMinSize() / Bkt.getSize()), NumAllocated{0}, - bucket(Bkt), SlabListIter{}, FirstFreeChunkIdx{0} { - auto SlabSize = Bkt.SlabAllocSize(); - MemPtr = memoryProviderAlloc(Bkt.getMemHandle(), SlabSize); - regSlab(*this); -} - -Slab::~Slab() { - try { - unregSlab(*this); - } catch (std::exception &e) { - LOG_ERR("DisjointPool: unexpected error: %s", e.what()); - } - - try { - memoryProviderFree(bucket.getMemHandle(), MemPtr); - } catch (MemoryProviderError &e) { - LOG_ERR("DisjointPool: error from memory provider: %d", e.code); - - if (e.code == UMF_RESULT_ERROR_MEMORY_PROVIDER_SPECIFIC) { - const char *message = ""; - int error = 0; - - try { - umfMemoryProviderGetLastNativeError( - umfGetLastFailedMemoryProvider(), &message, &error); - LOG_ERR("Native error msg: %s, native error code: %d", message, - error); - } catch (...) { - // ignore any additional errors from logger - } - } - } -} - -// Return the index of the first available chunk, SIZE_MAX otherwise -size_t Slab::FindFirstAvailableChunkIdx() const { - // Use the first free chunk index as a hint for the search. - auto It = std::find_if(Chunks.begin() + FirstFreeChunkIdx, Chunks.end(), - [](auto x) { return !x; }); - if (It != Chunks.end()) { - return It - Chunks.begin(); - } - - return std::numeric_limits::max(); -} - -void *Slab::getChunk() { - // assert(NumAllocated != Chunks.size()); - - const size_t ChunkIdx = FindFirstAvailableChunkIdx(); - // Free chunk must exist, otherwise we would have allocated another slab - assert(ChunkIdx != (std::numeric_limits::max())); - - void *const FreeChunk = - (static_cast(getPtr())) + ChunkIdx * getChunkSize(); - Chunks[ChunkIdx] = true; - NumAllocated += 1; - - // Use the found index as the next hint - FirstFreeChunkIdx = ChunkIdx; - - return FreeChunk; -} - -void *Slab::getSlab() { return getPtr(); } - -Bucket &Slab::getBucket() { return bucket; } -const Bucket &Slab::getBucket() const { return bucket; } - -size_t Slab::getChunkSize() const { return bucket.getSize(); } - -void Slab::regSlabByAddr(void *Addr, Slab &Slab) { - auto &Lock = Slab.getBucket().getAllocCtx().getKnownSlabsMapLock(); - auto &Map = Slab.getBucket().getAllocCtx().getKnownSlabs(); - - std::lock_guard Lg(Lock); - Map.insert({Addr, Slab}); -} - -void Slab::unregSlabByAddr(void *Addr, Slab &Slab) { - auto &Lock = Slab.getBucket().getAllocCtx().getKnownSlabsMapLock(); - auto &Map = Slab.getBucket().getAllocCtx().getKnownSlabs(); - - std::lock_guard Lg(Lock); - - auto Slabs = Map.equal_range(Addr); - // At least the must get the current slab from the map. - assert(Slabs.first != Slabs.second && "Slab is not found"); - - for (auto It = Slabs.first; It != Slabs.second; ++It) { - if (It->second == Slab) { - Map.erase(It); - return; - } - } - - assert(false && "Slab is not found"); -} - -void Slab::regSlab(Slab &Slab) { - void *StartAddr = AlignPtrDown(Slab.getPtr(), bucket.SlabMinSize()); - void *EndAddr = static_cast(StartAddr) + bucket.SlabMinSize(); - - regSlabByAddr(StartAddr, Slab); - regSlabByAddr(EndAddr, Slab); -} - -void Slab::unregSlab(Slab &Slab) { - void *StartAddr = AlignPtrDown(Slab.getPtr(), bucket.SlabMinSize()); - void *EndAddr = static_cast(StartAddr) + bucket.SlabMinSize(); - - unregSlabByAddr(StartAddr, Slab); - unregSlabByAddr(EndAddr, Slab); -} - -void Slab::freeChunk(void *Ptr) { - // This method should be called through bucket(since we might remove the slab - // as a result), therefore all locks are done on that level. - - // Make sure that we're in the right slab - assert(Ptr >= getPtr() && Ptr < getEnd()); - - // Even if the pointer p was previously aligned, it's still inside the - // corresponding chunk, so we get the correct index here. - auto ChunkIdx = (static_cast(Ptr) - static_cast(MemPtr)) / - getChunkSize(); - - // Make sure that the chunk was allocated - assert(Chunks[ChunkIdx] && "double free detected"); - - Chunks[ChunkIdx] = false; - NumAllocated -= 1; - - if (ChunkIdx < FirstFreeChunkIdx) { - FirstFreeChunkIdx = ChunkIdx; - } -} - -void *Slab::getEnd() const { - return static_cast(getPtr()) + bucket.SlabMinSize(); -} - -bool Slab::hasAvail() { return NumAllocated != getNumChunks(); } - -// If a slab was available in the pool then note that the current pooled -// size has reduced by the size of a slab in this bucket. -void Bucket::decrementPool(bool &FromPool) { - FromPool = true; - updateStats(1, -1); - OwnAllocCtx.getLimits()->TotalSize -= SlabAllocSize(); -} - -auto Bucket::getAvailFullSlab(bool &FromPool) - -> decltype(AvailableSlabs.begin()) { - // Return a slab that will be used for a single allocation. - if (AvailableSlabs.size() == 0) { - auto It = AvailableSlabs.insert(AvailableSlabs.begin(), - std::make_unique(*this)); - (*It)->setIterator(It); - FromPool = false; - updateStats(1, 0); - } else { - decrementPool(FromPool); - } - - return AvailableSlabs.begin(); -} - -void *Bucket::getSlab(bool &FromPool) { - std::lock_guard Lg(BucketLock); - - auto SlabIt = getAvailFullSlab(FromPool); - auto *FreeSlab = (*SlabIt)->getSlab(); - auto It = - UnavailableSlabs.insert(UnavailableSlabs.begin(), std::move(*SlabIt)); - AvailableSlabs.erase(SlabIt); - (*It)->setIterator(It); - return FreeSlab; -} - -void Bucket::freeSlab(Slab &Slab, bool &ToPool) { - std::lock_guard Lg(BucketLock); - auto SlabIter = Slab.getIterator(); - assert(SlabIter != UnavailableSlabs.end()); - if (CanPool(ToPool)) { - auto It = - AvailableSlabs.insert(AvailableSlabs.begin(), std::move(*SlabIter)); - UnavailableSlabs.erase(SlabIter); - (*It)->setIterator(It); - } else { - UnavailableSlabs.erase(SlabIter); - } -} - -auto Bucket::getAvailSlab(bool &FromPool) -> decltype(AvailableSlabs.begin()) { - - if (AvailableSlabs.size() == 0) { - auto It = AvailableSlabs.insert(AvailableSlabs.begin(), - std::make_unique(*this)); - (*It)->setIterator(It); - - updateStats(1, 0); - FromPool = false; - } else { - if ((*(AvailableSlabs.begin()))->getNumAllocated() == 0) { - // If this was an empty slab, it was in the pool. - // Now it is no longer in the pool, so update count. - --chunkedSlabsInPool; - decrementPool(FromPool); - } else { - // Allocation from existing slab is treated as from pool for statistics. - FromPool = true; - } - } - - return AvailableSlabs.begin(); -} - -void *Bucket::getChunk(bool &FromPool) { - std::lock_guard Lg(BucketLock); - - auto SlabIt = getAvailSlab(FromPool); - auto *FreeChunk = (*SlabIt)->getChunk(); - - // If the slab is full, move it to unavailable slabs and update its iterator - if (!((*SlabIt)->hasAvail())) { - auto It = UnavailableSlabs.insert(UnavailableSlabs.begin(), - std::move(*SlabIt)); - AvailableSlabs.erase(SlabIt); - (*It)->setIterator(It); - } - - return FreeChunk; -} - -void Bucket::freeChunk(void *Ptr, Slab &Slab, bool &ToPool) { - std::lock_guard Lg(BucketLock); - - Slab.freeChunk(Ptr); - - onFreeChunk(Slab, ToPool); -} - -// The lock must be acquired before calling this method -void Bucket::onFreeChunk(Slab &Slab, bool &ToPool) { - ToPool = true; - - // In case if the slab was previously full and now has 1 available - // chunk, it should be moved to the list of available slabs - if (Slab.getNumAllocated() == (Slab.getNumChunks() - 1)) { - auto SlabIter = Slab.getIterator(); - assert(SlabIter != UnavailableSlabs.end()); - - auto It = - AvailableSlabs.insert(AvailableSlabs.begin(), std::move(*SlabIter)); - UnavailableSlabs.erase(SlabIter); - - (*It)->setIterator(It); - } - - // Check if slab is empty, and pool it if we can. - if (Slab.getNumAllocated() == 0) { - // The slab is now empty. - // If pool has capacity then put the slab in the pool. - // The ToPool parameter indicates whether the Slab will be put in the - // pool or freed. - if (!CanPool(ToPool)) { - // Note: since the slab is stored as unique_ptr, just remove it from - // the list to destroy the object. - auto It = Slab.getIterator(); - assert(It != AvailableSlabs.end()); - AvailableSlabs.erase(It); - } - } -} - -bool Bucket::CanPool(bool &ToPool) { - size_t NewFreeSlabsInBucket; - // Check if this bucket is used in chunked form or as full slabs. - bool chunkedBucket = getSize() <= ChunkCutOff(); - if (chunkedBucket) { - NewFreeSlabsInBucket = chunkedSlabsInPool + 1; - } else { - NewFreeSlabsInBucket = AvailableSlabs.size() + 1; - } - if (Capacity() >= NewFreeSlabsInBucket) { - size_t PoolSize = OwnAllocCtx.getLimits()->TotalSize; - while (true) { - size_t NewPoolSize = PoolSize + SlabAllocSize(); - - if (OwnAllocCtx.getLimits()->MaxSize < NewPoolSize) { - break; - } - - if (OwnAllocCtx.getLimits()->TotalSize.compare_exchange_strong( - PoolSize, NewPoolSize)) { - if (chunkedBucket) { - ++chunkedSlabsInPool; - } - - updateStats(-1, 1); - ToPool = true; - return true; - } - } - } - - updateStats(-1, 0); - ToPool = false; - return false; -} - -umf_memory_provider_handle_t Bucket::getMemHandle() { - return OwnAllocCtx.getMemHandle(); -} - -size_t Bucket::SlabMinSize() { return OwnAllocCtx.getParams().SlabMinSize; } - -size_t Bucket::SlabAllocSize() { return std::max(getSize(), SlabMinSize()); } - -size_t Bucket::Capacity() { - // For buckets used in chunked mode, just one slab in pool is sufficient. - // For larger buckets, the capacity could be more and is adjustable. - if (getSize() <= ChunkCutOff()) { - return 1; - } else { - return OwnAllocCtx.getParams().Capacity; - } -} - -size_t Bucket::MaxPoolableSize() { - return OwnAllocCtx.getParams().MaxPoolableSize; -} - -size_t Bucket::ChunkCutOff() { return SlabMinSize() / 2; } - -void Bucket::countAlloc(bool FromPool) { - ++allocCount; - if (FromPool) { - ++allocPoolCount; - } -} - -void Bucket::countFree() { ++freeCount; } - -void Bucket::updateStats(int InUse, int InPool) { - if (OwnAllocCtx.getParams().PoolTrace == 0) { - return; - } - currSlabsInUse += InUse; - maxSlabsInUse = std::max(currSlabsInUse, maxSlabsInUse); - currSlabsInPool += InPool; - maxSlabsInPool = std::max(currSlabsInPool, maxSlabsInPool); - // Increment or decrement current pool sizes based on whether - // slab was added to or removed from pool. - OwnAllocCtx.getParams().CurPoolSize += InPool * SlabAllocSize(); -} - -void Bucket::printStats(bool &TitlePrinted, const std::string &Label) { - if (allocCount) { - if (!TitlePrinted) { - std::cout << Label << " memory statistics\n"; - std::cout << std::setw(14) << "Bucket Size" << std::setw(12) - << "Allocs" << std::setw(12) << "Frees" << std::setw(18) - << "Allocs from Pool" << std::setw(20) - << "Peak Slabs in Use" << std::setw(21) - << "Peak Slabs in Pool" << std::endl; - TitlePrinted = true; - } - std::cout << std::setw(14) << getSize() << std::setw(12) << allocCount - << std::setw(12) << freeCount << std::setw(18) - << allocPoolCount << std::setw(20) << maxSlabsInUse - << std::setw(21) << maxSlabsInPool << std::endl; - } -} - -void *DisjointPool::AllocImpl::allocate(size_t Size, bool &FromPool) try { - void *Ptr; - - if (Size == 0) { - return nullptr; - } - - FromPool = false; - if (Size > getParams().MaxPoolableSize) { - Ptr = memoryProviderAlloc(getMemHandle(), Size); - annotate_memory_undefined(Ptr, Size); - return Ptr; - } - - auto &Bucket = findBucket(Size); - - if (Size > Bucket.ChunkCutOff()) { - Ptr = Bucket.getSlab(FromPool); - } else { - Ptr = Bucket.getChunk(FromPool); - } - - if (getParams().PoolTrace > 1) { - Bucket.countAlloc(FromPool); - } - - VALGRIND_DO_MEMPOOL_ALLOC(this, Ptr, Size); - annotate_memory_undefined(Ptr, Bucket.getSize()); - - return Ptr; -} catch (MemoryProviderError &e) { - umf::getPoolLastStatusRef() = e.code; - return nullptr; -} - -void *DisjointPool::AllocImpl::allocate(size_t Size, size_t Alignment, - bool &FromPool) try { - void *Ptr; - - if (Size == 0) { - return nullptr; - } - - if (Alignment <= 1) { - return allocate(Size, FromPool); - } - - size_t AlignedSize; - if (Alignment <= ProviderMinPageSize) { - // This allocation will be served from a Bucket which size is multiple - // of Alignment and Slab address is aligned to ProviderMinPageSize - // so the address will be properly aligned. - AlignedSize = (Size > 1) ? AlignUp(Size, Alignment) : Alignment; - } else { - // Slabs are only aligned to ProviderMinPageSize, we need to compensate - // for that in case the allocation is within pooling limit. - // TODO: consider creating properly-aligned Slabs on demand - AlignedSize = Size + Alignment - 1; - } - - // Check if requested allocation size is within pooling limit. - // If not, just request aligned pointer from the system. - FromPool = false; - if (AlignedSize > getParams().MaxPoolableSize) { - Ptr = memoryProviderAlloc(getMemHandle(), Size, Alignment); - annotate_memory_undefined(Ptr, Size); - return Ptr; - } - - auto &Bucket = findBucket(AlignedSize); - - if (AlignedSize > Bucket.ChunkCutOff()) { - Ptr = Bucket.getSlab(FromPool); - } else { - Ptr = Bucket.getChunk(FromPool); - } - - if (getParams().PoolTrace > 1) { - Bucket.countAlloc(FromPool); - } - - VALGRIND_DO_MEMPOOL_ALLOC(this, AlignPtrUp(Ptr, Alignment), Size); - annotate_memory_undefined(AlignPtrUp(Ptr, Alignment), Size); - return AlignPtrUp(Ptr, Alignment); -} catch (MemoryProviderError &e) { - umf::getPoolLastStatusRef() = e.code; - return nullptr; -} - -std::size_t DisjointPool::AllocImpl::sizeToIdx(size_t Size) { - assert(Size <= CutOff && "Unexpected size"); - assert(Size > 0 && "Unexpected size"); - - size_t MinBucketSize = (size_t)1 << MinBucketSizeExp; - if (Size < MinBucketSize) { - return 0; - } - - // Get the position of the leftmost set bit. - size_t position = getLeftmostSetBitPos(Size); - - auto isPowerOf2 = 0 == (Size & (Size - 1)); - auto largerThanHalfwayBetweenPowersOf2 = - !isPowerOf2 && bool((Size - 1) & (uint64_t(1) << (position - 1))); - auto index = (position - MinBucketSizeExp) * 2 + (int)(!isPowerOf2) + - (int)largerThanHalfwayBetweenPowersOf2; - - return index; -} - -Bucket &DisjointPool::AllocImpl::findBucket(size_t Size) { - auto calculatedIdx = sizeToIdx(Size); - assert((*(Buckets[calculatedIdx])).getSize() >= Size); - if (calculatedIdx > 0) { - assert((*(Buckets[calculatedIdx - 1])).getSize() < Size); - } - - return *(Buckets[calculatedIdx]); -} - -void DisjointPool::AllocImpl::deallocate(void *Ptr, bool &ToPool) { - auto *SlabPtr = AlignPtrDown(Ptr, SlabMinSize()); - - // Lock the map on read - std::shared_lock Lk(getKnownSlabsMapLock()); - - ToPool = false; - auto Slabs = getKnownSlabs().equal_range(SlabPtr); - if (Slabs.first == Slabs.second) { - Lk.unlock(); - memoryProviderFree(getMemHandle(), Ptr); - return; - } - - for (auto It = Slabs.first; It != Slabs.second; ++It) { - // The slab object won't be deleted until it's removed from the map which is - // protected by the lock, so it's safe to access it here. - auto &Slab = It->second; - if (Ptr >= Slab.getPtr() && Ptr < Slab.getEnd()) { - // Unlock the map before freeing the chunk, it may be locked on write - // there - Lk.unlock(); - auto &Bucket = Slab.getBucket(); - - if (getParams().PoolTrace > 1) { - Bucket.countFree(); - } - - VALGRIND_DO_MEMPOOL_FREE(this, Ptr); - annotate_memory_inaccessible(Ptr, Bucket.getSize()); - if (Bucket.getSize() <= Bucket.ChunkCutOff()) { - Bucket.freeChunk(Ptr, Slab, ToPool); - } else { - Bucket.freeSlab(Slab, ToPool); - } - - return; - } - } - - Lk.unlock(); - // There is a rare case when we have a pointer from system allocation next - // to some slab with an entry in the map. So we find a slab - // but the range checks fail. - memoryProviderFree(getMemHandle(), Ptr); -} - -void DisjointPool::AllocImpl::printStats(bool &TitlePrinted, - size_t &HighBucketSize, - size_t &HighPeakSlabsInUse, - const std::string &MTName) { - HighBucketSize = 0; - HighPeakSlabsInUse = 0; - for (auto &B : Buckets) { - (*B).printStats(TitlePrinted, MTName); - HighPeakSlabsInUse = std::max((*B).maxSlabsInUse, HighPeakSlabsInUse); - if ((*B).allocCount) { - HighBucketSize = std::max((*B).SlabAllocSize(), HighBucketSize); - } - } -} - -umf_result_t -DisjointPool::initialize(umf_memory_provider_handle_t provider, - umf_disjoint_pool_params_handle_t parameters) { - if (!provider) { - return UMF_RESULT_ERROR_INVALID_ARGUMENT; - } - // MinBucketSize parameter must be a power of 2 for bucket sizes - // to generate correctly. - if (!parameters->MinBucketSize || - !((parameters->MinBucketSize & (parameters->MinBucketSize - 1)) == 0)) { - return UMF_RESULT_ERROR_INVALID_ARGUMENT; - } - - impl = std::make_unique(provider, parameters); - return UMF_RESULT_SUCCESS; -} - -void *DisjointPool::malloc(size_t size) { // For full-slab allocations indicates - // whether slab is from Pool. - bool FromPool; - auto Ptr = impl->allocate(size, FromPool); - - if (impl->getParams().PoolTrace > 2) { - auto MT = impl->getParams().Name; - std::cout << "Allocated " << std::setw(8) << size << " " << MT - << " bytes from " << (FromPool ? "Pool" : "Provider") << " ->" - << Ptr << std::endl; - } - return Ptr; -} - -void *DisjointPool::calloc(size_t, size_t) { - // Not supported - umf::getPoolLastStatusRef() = UMF_RESULT_ERROR_NOT_SUPPORTED; - return NULL; -} - -void *DisjointPool::realloc(void *, size_t) { - // Not supported - umf::getPoolLastStatusRef() = UMF_RESULT_ERROR_NOT_SUPPORTED; - return NULL; -} - -void *DisjointPool::aligned_malloc(size_t size, size_t alignment) { - bool FromPool; - auto Ptr = impl->allocate(size, alignment, FromPool); - - if (impl->getParams().PoolTrace > 2) { - auto MT = impl->getParams().Name; - std::cout << "Allocated " << std::setw(8) << size << " " << MT - << " bytes aligned at " << alignment << " from " - << (FromPool ? "Pool" : "Provider") << " ->" << Ptr - << std::endl; - } - return Ptr; -} - -size_t DisjointPool::malloc_usable_size(void *) { - // Not supported - return 0; -} - -umf_result_t DisjointPool::free(void *ptr) try { - bool ToPool; - impl->deallocate(ptr, ToPool); - - if (impl->getParams().PoolTrace > 2) { - auto MT = impl->getParams().Name; - std::cout << "Freed " << MT << " " << ptr << " to " - << (ToPool ? "Pool" : "Provider") - << ", Current total pool size " - << impl->getLimits()->TotalSize.load() - << ", Current pool size for " << MT << " " - << impl->getParams().CurPoolSize << "\n"; - } - return UMF_RESULT_SUCCESS; -} catch (MemoryProviderError &e) { - return e.code; -} - -umf_result_t DisjointPool::get_last_allocation_error() { - return umf::getPoolLastStatusRef(); -} - -DisjointPool::DisjointPool() {} - -// Define destructor for use with unique_ptr -DisjointPool::~DisjointPool() { - bool TitlePrinted = false; - size_t HighBucketSize; - size_t HighPeakSlabsInUse; - if (impl->getParams().PoolTrace > 1) { - auto name = impl->getParams().Name; - try { // cannot throw in destructor - impl->printStats(TitlePrinted, HighBucketSize, HighPeakSlabsInUse, - name); - if (TitlePrinted) { - std::cout << "Current Pool Size " - << impl->getLimits()->TotalSize.load() << std::endl; - std::cout << "Suggested Setting=;" - << std::string(1, (char)tolower(name[0])) - << std::string(name + 1) << ":" << HighBucketSize - << "," << HighPeakSlabsInUse << ",64K" << std::endl; - } - } catch (...) { // ignore exceptions - } - } -} - -static umf_memory_pool_ops_t UMF_DISJOINT_POOL_OPS = - umf::poolMakeCOps(); - -umf_memory_pool_ops_t *umfDisjointPoolOps(void) { - return &UMF_DISJOINT_POOL_OPS; -} diff --git a/src/pool/pool_disjoint_internal.h b/src/pool/pool_disjoint_internal.h new file mode 100644 index 000000000..25d3729e1 --- /dev/null +++ b/src/pool/pool_disjoint_internal.h @@ -0,0 +1,221 @@ +/* + * Copyright (C) 2024 Intel Corporation + * + * Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +*/ + +#ifndef UMF_POOL_DISJOINT_INTERNAL_H +#define UMF_POOL_DISJOINT_INTERNAL_H 1 + +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +#include "critnib/critnib.h" +#include "uthash/utlist.h" + +#include "base_alloc_global.h" +#include "provider/provider_tracking.h" +#include "utils_common.h" +#include "utils_concurrency.h" +#include "utils_log.h" +#include "utils_math.h" +#include "utils_sanitizers.h" + +typedef struct bucket_t bucket_t; +typedef struct slab_t slab_t; +typedef struct slab_list_item_t slab_list_item_t; +typedef struct disjoint_pool_t disjoint_pool_t; + +typedef struct bucket_t { + size_t size; + + // Linked list of slabs which have at least 1 available chunk. + // We always count available slabs as an optimization. + slab_list_item_t *available_slabs; + size_t available_slabs_num; + + // Linked list of slabs with 0 available chunk. + slab_list_item_t *unavailable_slabs; + + // Protects the bucket and all the corresponding slabs + utils_mutex_t bucket_lock; + + // Reference to the allocator context, used access memory allocation + // routines, slab map and etc. + disjoint_pool_t *pool; + + umf_disjoint_pool_shared_limits_handle_t shared_limits; + + // For buckets used in chunked mode, a counter of slabs in the pool. + // For allocations that use an entire slab each, the entries in the Available + // list are entries in the pool.Each slab is available for a new + // allocation.The size of the Available list is the size of the pool. + // For allocations that use slabs in chunked mode, slabs will be in the + // Available list if any one or more of their chunks is free.The entire slab + // is not necessarily free, just some chunks in the slab are free. To + // implement pooling we will allow one slab in the Available list to be + // entirely empty. Normally such a slab would have been freed. But + // now we don't, and treat this slab as "in the pool". + // When a slab becomes entirely free we have to decide whether to return it + // to the provider or keep it allocated. A simple check for size of the + // Available list is not sufficient to check whether any slab has been + // pooled yet.We would have to traverse the entire Available listand check + // if any of them is entirely free. Instead we keep a counter of entirely + // empty slabs within the Available list to speed up the process of checking + // if a slab in this bucket is already pooled. + size_t chunked_slabs_in_pool; + + // Statistics + size_t alloc_pool_count; + size_t free_count; + size_t curr_slabs_in_use; + size_t curr_slabs_in_pool; + size_t max_slabs_in_pool; + size_t alloc_count; + size_t max_slabs_in_use; +} bucket_t; + +// Represents the allocated memory block of size 'slab_min_size' +// Internally, it splits the memory block into chunks. The number of +// chunks depends of the size of a Bucket which created the Slab. +// Note: Bucket's methods are responsible for thread safety of Slab access, +// so no locking happens here. +typedef struct slab_t { + // Pointer to the allocated memory of slab_min_size bytes + void *mem_ptr; + size_t slab_size; + + // Represents the current state of each chunk: if the bit is set then the + // chunk is allocated, and if the chunk is free for allocation otherwise + bool *chunks; + size_t num_chunks; + + // Total number of allocated chunks at the moment. + size_t num_allocated; + + // The bucket which the slab belongs to + bucket_t *bucket; + + // Hints where to start search for free chunk in a slab + size_t first_free_chunk_idx; + + // Store iterator to the corresponding node in avail/unavail list + // to achieve O(1) removal + slab_list_item_t *iter; +} slab_t; + +typedef struct slab_list_item_t { + slab_t *val; + struct slab_list_item_t *prev, *next; +} slab_list_item_t; + +typedef struct umf_disjoint_pool_shared_limits_t { + size_t max_size; + size_t total_size; // requires atomic access +} umf_disjoint_pool_shared_limits_t; + +typedef struct umf_disjoint_pool_params_t { + // Minimum allocation size that will be requested from the memory provider. + size_t slab_min_size; + + // Allocations up to this limit will be subject to chunking/pooling + size_t max_poolable_size; + + // When pooling, each bucket will hold a max of 'capacity' unfreed slabs + size_t capacity; + + // Holds the minimum bucket size valid for allocation of a memory type. + // This value must be a power of 2. + size_t min_bucket_size; + + // Holds size of the pool managed by the allocator. + size_t cur_pool_size; + + // Whether to print pool usage statistics + int pool_trace; + + // Memory limits that can be shared between multiple pool instances, + // i.e. if multiple pools use the same shared_limits sum of those pools' + // sizes cannot exceed max_size. + umf_disjoint_pool_shared_limits_handle_t shared_limits; + + // Name used in traces + char *name; +} umf_disjoint_pool_params_t; + +typedef struct disjoint_pool_t { + // It's important for the map to be destroyed last after buckets and their + // slabs This is because slab's destructor removes the object from the map. + critnib *known_slabs; // (void *, slab_t *) + + // TODO: prev std::shared_timed_mutex - ok? + utils_mutex_t known_slabs_map_lock; + + // Handle to the memory provider + umf_memory_provider_handle_t provider; + + // Array of bucket_t* + bucket_t **buckets; + size_t buckets_num; + + // Configuration for this instance + umf_disjoint_pool_params_t params; + + umf_disjoint_pool_shared_limits_handle_t default_shared_limits; + + // Used in algorithm for finding buckets + size_t min_bucket_size_exp; + + // Coarse-grain allocation min alignment + size_t provider_min_page_size; +} disjoint_pool_t; + +slab_t *create_slab(bucket_t *bucket); +void destroy_slab(slab_t *slab); + +void *slab_get(const slab_t *slab); +void *slab_get_end(const slab_t *slab); +void *slab_get_chunk(slab_t *slab); + +bool slab_has_avail(const slab_t *slab); +void slab_free_chunk(slab_t *slab, void *ptr); + +void slab_reg(slab_t *slab); +void slab_reg_by_addr(void *addr, slab_t *slab); +void slab_unreg(slab_t *slab); +void slab_unreg_by_addr(void *addr, slab_t *slab); + +bucket_t *create_bucket(size_t sz, disjoint_pool_t *pool, + umf_disjoint_pool_shared_limits_t *shared_limits); +void destroy_bucket(bucket_t *bucket); + +void bucket_update_stats(bucket_t *bucket, int in_use, int in_pool); +bool bucket_can_pool(bucket_t *bucket, bool *to_pool); +void bucket_decrement_pool(bucket_t *bucket, bool *from_pool); +void *bucket_get_chunk(bucket_t *bucket, bool *from_pool); +size_t bucket_chunk_cut_off(bucket_t *bucket); +size_t bucket_capacity(bucket_t *bucket); +void bucket_free_chunk(bucket_t *bucket, void *ptr, slab_t *slab, + bool *to_pool); +void bucket_count_alloc(bucket_t *bucket, bool from_pool); + +void *bucket_get_slab(bucket_t *bucket, bool *from_pool); +size_t bucket_slab_alloc_size(bucket_t *bucket); +size_t bucket_slab_min_size(bucket_t *bucket); +slab_list_item_t *bucket_get_avail_slab(bucket_t *bucket, bool *from_pool); +slab_list_item_t *bucket_get_avail_full_slab(bucket_t *bucket, bool *from_pool); +void bucket_free_slab(bucket_t *bucket, slab_t *slab, bool *to_pool); + +bucket_t *disjoint_pool_find_bucket(disjoint_pool_t *pool, size_t size); + +#endif // UMF_POOL_DISJOINT_INTERNAL_H diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index d5a07bfbb..fe0ab6154 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -58,10 +58,6 @@ function(build_umf_test) set(CPL_DEFS ${CPL_DEFS} UMF_POOL_SCALABLE_ENABLED=1) endif() - if(UMF_BUILD_LIBUMF_POOL_DISJOINT) - set(CPL_DEFS ${CPL_DEFS} UMF_POOL_DISJOINT_ENABLED=1) - endif() - set(TEST_LIBS umf_test_common ${ARG_LIBS} @@ -164,10 +160,6 @@ if(UMF_BUILD_SHARED_LIBRARY) endif() endif() -if(UMF_BUILD_LIBUMF_POOL_DISJOINT) - set(LIB_DISJOINT_POOL disjoint_pool) -endif() - if(UMF_BUILD_SHARED_LIBRARY) # if build as shared library, ba symbols won't be visible in tests set(BA_SOURCES_FOR_TEST ${BA_SOURCES}) @@ -204,32 +196,29 @@ add_umf_test( SRCS coarse_lib.cpp ${BA_SOURCES_FOR_TEST} LIBS ${UMF_UTILS_FOR_TEST} coarse) -if(UMF_BUILD_LIBUMF_POOL_DISJOINT) - add_umf_test( - NAME disjointPool - SRCS pools/disjoint_pool.cpp malloc_compliance_tests.cpp - LIBS disjoint_pool) +add_umf_test( + NAME disjointPool + SRCS pools/disjoint_pool.cpp malloc_compliance_tests.cpp + ${BA_SOURCES_FOR_TEST} + LIBS ${UMF_UTILS_FOR_TEST}) + +add_umf_test( + NAME c_api_disjoint_pool + SRCS c_api/disjoint_pool.c ${BA_SOURCES_FOR_TEST} + LIBS ${UMF_UTILS_FOR_TEST}) + +if(LINUX AND (NOT UMF_DISABLE_HWLOC)) + # this test uses the file provider add_umf_test( - NAME c_api_disjoint_pool - SRCS c_api/disjoint_pool.c - LIBS disjoint_pool) - if(LINUX AND (NOT UMF_DISABLE_HWLOC)) - # this test uses the file provider - add_umf_test( - NAME disjointPoolFileProv - SRCS disjointPoolFileProv.cpp - LIBS disjoint_pool) - endif() + NAME disjointPoolFileProv + SRCS disjointPoolFileProv.cpp ${BA_SOURCES_FOR_TEST} + LIBS ${UMF_UTILS_FOR_TEST}) endif() -if(UMF_BUILD_LIBUMF_POOL_DISJOINT - AND UMF_POOL_JEMALLOC_ENABLED +if(UMF_POOL_JEMALLOC_ENABLED AND UMF_POOL_SCALABLE_ENABLED AND (NOT UMF_DISABLE_HWLOC)) - add_umf_test( - NAME c_api_multi_pool - SRCS c_api/multi_pool.c - LIBS disjoint_pool) + add_umf_test(NAME c_api_multi_pool SRCS c_api/multi_pool.c) endif() if(UMF_POOL_JEMALLOC_ENABLED AND (NOT UMF_DISABLE_HWLOC)) @@ -260,7 +249,7 @@ if(LINUX AND (NOT UMF_DISABLE_HWLOC)) # OS-specific functions are implemented add_umf_test( NAME provider_os_memory SRCS provider_os_memory.cpp ${BA_SOURCES_FOR_TEST} - LIBS ${UMF_UTILS_FOR_TEST} ${LIB_DISJOINT_POOL}) + LIBS ${UMF_UTILS_FOR_TEST}) add_umf_test( NAME provider_os_memory_multiple_numa_nodes SRCS provider_os_memory_multiple_numa_nodes.cpp @@ -575,9 +564,7 @@ if(LINUX) # TODO add IPC tests for CUDA - if(UMF_BUILD_GPU_TESTS - AND UMF_BUILD_LEVEL_ZERO_PROVIDER - AND UMF_BUILD_LIBUMF_POOL_DISJOINT) + if(UMF_BUILD_GPU_TESTS AND UMF_BUILD_LEVEL_ZERO_PROVIDER) build_umf_test( NAME ipc_level_zero_prov_consumer @@ -588,7 +575,6 @@ if(LINUX) ${UMF_UTILS_DIR}/utils_level_zero.cpp LIBS ze_loader - disjoint_pool ${UMF_UTILS_FOR_TEST}) build_umf_test( NAME @@ -600,14 +586,11 @@ if(LINUX) ${UMF_UTILS_DIR}/utils_level_zero.cpp LIBS ze_loader - disjoint_pool ${UMF_UTILS_FOR_TEST}) add_umf_ipc_test(TEST ipc_level_zero_prov SRC_DIR providers) endif() - if(UMF_BUILD_GPU_TESTS - AND UMF_BUILD_CUDA_PROVIDER - AND UMF_BUILD_LIBUMF_POOL_DISJOINT) + if(UMF_BUILD_GPU_TESTS AND UMF_BUILD_CUDA_PROVIDER) build_umf_test( NAME ipc_cuda_prov_consumer @@ -618,7 +601,6 @@ if(LINUX) providers/cuda_helpers.cpp LIBS cuda - disjoint_pool ${UMF_UTILS_FOR_TEST}) build_umf_test( NAME @@ -630,7 +612,6 @@ if(LINUX) providers/cuda_helpers.cpp LIBS cuda - disjoint_pool ${UMF_UTILS_FOR_TEST}) add_umf_ipc_test(TEST ipc_cuda_prov SRC_DIR providers) endif() @@ -684,41 +665,34 @@ if(LINUX ) endif() - if(UMF_BUILD_GPU_EXAMPLES - AND UMF_BUILD_LIBUMF_POOL_DISJOINT - AND UMF_BUILD_LEVEL_ZERO_PROVIDER) + if(UMF_BUILD_GPU_EXAMPLES AND UMF_BUILD_LEVEL_ZERO_PROVIDER) set(EXAMPLES ${EXAMPLES} level_zero_shared_memory) else() message( STATUS - "GPU level zero shared memory example requires UMF_BUILD_GPU_EXAMPLES, " - "UMF_BUILD_LEVEL_ZERO_PROVIDER and UMF_BUILD_LIBUMF_POOL_DISJOINT " - "to be turned ON - skipping") + "GPU level zero shared memory example requires UMF_BUILD_GPU_EXAMPLES and " + "UMF_BUILD_LEVEL_ZERO_PROVIDER to be turned ON - skipping") endif() if(UMF_BUILD_GPU_EXAMPLES - AND UMF_BUILD_LIBUMF_POOL_DISJOINT AND UMF_BUILD_CUDA_PROVIDER AND UMF_CUDA_ENABLED) set(EXAMPLES ${EXAMPLES} cuda_shared_memory) else() message( STATUS - "GPU CUDA shared memory example requires UMF_BUILD_GPU_EXAMPLES, " - "UMF_BUILD_CUDA_PROVIDER, UMF_BUILD_LIBUMF_POOL_DISJOINT " - "to be turned ON and installed CUDA libraries - skipping") + "GPU CUDA shared memory example requires UMF_BUILD_GPU_EXAMPLES " + "and UMF_BUILD_CUDA_PROVIDER to be turned ON and installed CUDA " + "libraries - skipping") endif() # TODO add IPC examples for CUDA - if(UMF_BUILD_GPU_EXAMPLES - AND UMF_BUILD_LIBUMF_POOL_DISJOINT - AND UMF_BUILD_LEVEL_ZERO_PROVIDER) + if(UMF_BUILD_GPU_EXAMPLES AND UMF_BUILD_LEVEL_ZERO_PROVIDER) set(EXAMPLES ${EXAMPLES} ipc_level_zero) else() message( - STATUS - "IPC Level 0 example requires UMF_BUILD_GPU_EXAMPLES, UMF_BUILD_LEVEL_ZERO_PROVIDER and UMF_BUILD_LIBUMF_POOL_DISJOINT to be turned ON - skipping" - ) + STATUS "IPC Level 0 example requires UMF_BUILD_GPU_EXAMPLES and " + "UMF_BUILD_LEVEL_ZERO_PROVIDER to be turned ON - skipping") endif() if(UMF_POOL_SCALABLE_ENABLED) diff --git a/test/c_api/disjoint_pool.c b/test/c_api/disjoint_pool.c index 4d4634def..d38827808 100644 --- a/test/c_api/disjoint_pool.c +++ b/test/c_api/disjoint_pool.c @@ -4,7 +4,8 @@ #include -#include "pool_disjoint.h" +#include + #include "provider_null.h" #include "test_helpers.h" #include "test_ut_asserts.h" diff --git a/test/pools/disjoint_pool.cpp b/test/pools/disjoint_pool.cpp index c254400db..eaa8fc3ab 100644 --- a/test/pools/disjoint_pool.cpp +++ b/test/pools/disjoint_pool.cpp @@ -4,9 +4,11 @@ #include +#include + #include "pool.hpp" +#include "pool/pool_disjoint_internal.h" #include "poolFixtures.hpp" -#include "pool_disjoint.h" #include "provider.hpp" #include "provider_null.h" #include "provider_trace.h" @@ -57,6 +59,100 @@ disjoint_params_unique_handle_t poolConfig() { using umf_test::test; using namespace umf_test; +TEST_F(test, internals) { + static umf_result_t expectedResult = UMF_RESULT_SUCCESS; + struct memory_provider : public umf_test::provider_base_t { + umf_result_t alloc(size_t size, size_t, void **ptr) noexcept { + *ptr = malloc(size); + return UMF_RESULT_SUCCESS; + } + + umf_result_t free(void *ptr, [[maybe_unused]] size_t size) noexcept { + // do the actual free only when we expect the success + if (expectedResult == UMF_RESULT_SUCCESS) { + ::free(ptr); + } + return expectedResult; + } + + umf_result_t + get_min_page_size([[maybe_unused]] void *ptr, + [[maybe_unused]] size_t *pageSize) noexcept { + *pageSize = 1024; + return UMF_RESULT_SUCCESS; + } + }; + umf_memory_provider_ops_t provider_ops = + umf::providerMakeCOps(); + + auto providerUnique = + wrapProviderUnique(createProviderChecked(&provider_ops, nullptr)); + + umf_memory_provider_handle_t provider_handle; + provider_handle = providerUnique.get(); + + umf_disjoint_pool_params_t params = *poolConfig(); + // set to maximum tracing + params.pool_trace = 3; + + // in "internals" test we use ops interface to directly manipulate the pool + // structure + umf_memory_pool_ops_t *ops = umfDisjointPoolOps(); + EXPECT_NE(ops, nullptr); + + disjoint_pool_t *pool; + umf_result_t res = + ops->initialize(provider_handle, ¶ms, (void **)&pool); + EXPECT_EQ(res, UMF_RESULT_SUCCESS); + EXPECT_NE(pool, nullptr); + EXPECT_EQ(pool->provider_min_page_size, 1024); + + // test small allocations + size_t size = 8; + void *ptr = ops->malloc(pool, size); + EXPECT_NE(ptr, nullptr); + + // get bucket - because of small size this should be the first bucket in + // the pool + bucket_t *bucket = pool->buckets[0]; + EXPECT_NE(bucket, nullptr); + + // check bucket stats + EXPECT_EQ(bucket->alloc_count, 1); + EXPECT_EQ(bucket->alloc_pool_count, 1); + EXPECT_EQ(bucket->curr_slabs_in_use, 1); + + // check slab - there should be only single slab allocated + EXPECT_NE(bucket->available_slabs, nullptr); + EXPECT_EQ(bucket->available_slabs_num, 1); + EXPECT_EQ(bucket->available_slabs->next, nullptr); + slab_t *slab = bucket->available_slabs->val; + + // check slab stats + EXPECT_GE(slab->slab_size, params.slab_min_size); + EXPECT_GE(slab->num_chunks, slab->slab_size / bucket->size); + + // check allocation in slab + EXPECT_EQ(slab->chunks[0], true); + EXPECT_EQ(slab->chunks[1], false); + EXPECT_EQ(slab->first_free_chunk_idx, 1); + + // TODO: + // * multiple alloc + free from single bucket + // * alignments + // * full slab alloc + // * slab overflow + // * chunked slabs + // * multiple alloc + free from different buckets + // * alloc something outside pool (> MaxPoolableSize) + // * test capacity + // * check minBucketSize + // * test large objects + + // cleanup + ops->finalize(pool); +} + TEST_F(test, freeErrorPropagation) { static umf_result_t expectedResult = UMF_RESULT_SUCCESS; struct memory_provider : public umf_test::provider_base_t { diff --git a/test/provider_os_memory.cpp b/test/provider_os_memory.cpp index 4c81b84f9..39224941b 100644 --- a/test/provider_os_memory.cpp +++ b/test/provider_os_memory.cpp @@ -9,10 +9,8 @@ #include "test_helpers.h" #include -#include -#if (defined UMF_POOL_DISJOINT_ENABLED) #include -#endif +#include #ifdef UMF_POOL_JEMALLOC_ENABLED #include #endif @@ -427,8 +425,6 @@ os_params_unique_handle_t osMemoryProviderParamsShared() { auto os_params = osMemoryProviderParamsShared(); HostMemoryAccessor hostAccessor; - -#if (defined UMF_POOL_DISJOINT_ENABLED) using disjoint_params_unique_handle_t = std::unique_ptr; @@ -464,13 +460,10 @@ disjoint_params_unique_handle_t disjointPoolParams() { &umfDisjointPoolParamsDestroy); } disjoint_params_unique_handle_t disjointParams = disjointPoolParams(); -#endif static std::vector ipcTestParamsList = { -#if (defined UMF_POOL_DISJOINT_ENABLED) {umfDisjointPoolOps(), disjointParams.get(), umfOsMemoryProviderOps(), os_params.get(), &hostAccessor}, -#endif #ifdef UMF_POOL_JEMALLOC_ENABLED {umfJemallocPoolOps(), nullptr, umfOsMemoryProviderOps(), os_params.get(), &hostAccessor}, diff --git a/test/test_installation.py b/test/test_installation.py index b5dd676dc..8b53abf52 100644 --- a/test/test_installation.py +++ b/test/test_installation.py @@ -278,11 +278,6 @@ def parse_arguments(self) -> argparse.Namespace: action="store_true", help="Add this argument if the proxy library should be built together with the UMF library", ) - self.parser.add_argument( - "--disjoint-pool", - action="store_true", - help="Add this argument if the UMF was built with Disjoint Pool enabled", - ) self.parser.add_argument( "--umf-version", action="store", @@ -299,8 +294,6 @@ def run(self) -> None: build_dir = Path(workspace_dir, self.args.build_dir) install_dir = Path(workspace_dir, self.args.install_dir) pools = [] - if self.args.disjoint_pool: - pools.append("disjoint_pool") umf_version = Version(self.args.umf_version)