From 1fc048a2a5a32fdd9ef30b3a9c7180514f7a27d8 Mon Sep 17 00:00:00 2001 From: Mathieu Guillame-Bert Date: Wed, 8 May 2024 01:21:37 -0700 Subject: [PATCH] Release YDF 0.4.3 and TF-DF 1.9.1 PiperOrigin-RevId: 631693889 --- .../port/python/CHANGELOG.md | 4 +- .../port/python/config/setup.py | 3 +- .../port/python/dev_requirements.txt | 5 +- .../port/python/examples/minimal.py | 6 ++- .../tools/build_linux_aarch64_release.sh | 2 +- .../port/python/tools/build_linux_release.sh | 4 +- .../port/python/tools/build_macos_release.sh | 2 +- .../python/tools/build_windows_release.bat | 3 +- .../port/python/ydf/BUILD | 46 +++++++++++++++++++ .../port/python/ydf/api_test.py | 5 ++ .../port/python/ydf/learner/learner_test.py | 6 +-- .../port/python/ydf/metric/metric_test.py | 6 +-- .../port/python/ydf/version.py | 2 +- .../tools/run_e2e_pydf_test.sh | 24 ++++++---- 14 files changed, 91 insertions(+), 27 deletions(-) diff --git a/yggdrasil_decision_forests/port/python/CHANGELOG.md b/yggdrasil_decision_forests/port/python/CHANGELOG.md index 2f1bfe18..3bb912f3 100644 --- a/yggdrasil_decision_forests/port/python/CHANGELOG.md +++ b/yggdrasil_decision_forests/port/python/CHANGELOG.md @@ -1,17 +1,19 @@ # Changelog -## HEAD +## 0.4.3- 2024-05-07 ### Feature - Add `model.to_jax_function()` function to convert a YDF model into a JAX function that can be combined with other JAX operations. - Print warnings when categorical features look like numbers. +- Add support for Python 3.12. ### Fix - Fix cross-validation for non-classification learners. - Fix missing ydf/model/tree/plotter.js +- Solve dependency collision of YDF Proto between PYDF and TF-DF. ## 0.4.2- 2024-04-22 diff --git a/yggdrasil_decision_forests/port/python/config/setup.py b/yggdrasil_decision_forests/port/python/config/setup.py index 285c5ced..8727d908 100644 --- a/yggdrasil_decision_forests/port/python/config/setup.py +++ b/yggdrasil_decision_forests/port/python/config/setup.py @@ -21,7 +21,7 @@ from setuptools.command.install import install from setuptools.dist import Distribution -_VERSION = "0.4.2" +_VERSION = "0.4.3" with open("README.md", "r", encoding="utf-8") as fh: long_description = fh.read() @@ -111,6 +111,7 @@ def get_tag(self): "Programming Language :: Python :: 3.9", "Programming Language :: Python :: 3.10", "Programming Language :: Python :: 3.11", + "Programming Language :: Python :: 3.12", "Programming Language :: Python :: 3 :: Only", "Topic :: Scientific/Engineering", "Topic :: Scientific/Engineering :: Mathematics", diff --git a/yggdrasil_decision_forests/port/python/dev_requirements.txt b/yggdrasil_decision_forests/port/python/dev_requirements.txt index cde38f9a..86a58190 100644 --- a/yggdrasil_decision_forests/port/python/dev_requirements.txt +++ b/yggdrasil_decision_forests/port/python/dev_requirements.txt @@ -1,8 +1,9 @@ pandas -tensorflow_decision_forests; platform_machine != 'aarch64' +tensorflow_decision_forests; platform_machine != 'aarch64' and python_version >= '3.9' and python_version < '3.12' tensorflow; platform_machine != 'aarch64' portpicker matplotlib jax; platform_machine != 'aarch64' and platform_system != 'Windows' jaxlib; platform_machine != 'aarch64' and platform_system != 'Windows' -optax; platform_machine != 'aarch64' and platform_system != 'Windows' and python_version >= '3.9' \ No newline at end of file +optax; platform_machine != 'aarch64' and platform_system != 'Windows' and python_version >= '3.9' +flatbuffers; platform_machine != 'aarch64' and platform_system != 'Windows' and python_version >= '3.12' \ No newline at end of file diff --git a/yggdrasil_decision_forests/port/python/examples/minimal.py b/yggdrasil_decision_forests/port/python/examples/minimal.py index 0a46adc7..2de3b487 100644 --- a/yggdrasil_decision_forests/port/python/examples/minimal.py +++ b/yggdrasil_decision_forests/port/python/examples/minimal.py @@ -23,6 +23,7 @@ python minimal.py """ +import sys from absl import app import pandas as pd import ydf @@ -61,7 +62,10 @@ def main(argv): predictions = loaded_model.predict(test_df) print(predictions) - loaded_model.to_tensorflow_saved_model("/tmp/tf_saved_model") + if not (sys.version_info < (3, 9)) and (sys.version_info < (3, 12)): + # TensorFlow is not supported anymore for py3.8. + # TensorFlow Decision Forests is not yet supported for py3.12. + loaded_model.to_tensorflow_saved_model("/tmp/tf_saved_model") if __name__ == "__main__": diff --git a/yggdrasil_decision_forests/port/python/tools/build_linux_aarch64_release.sh b/yggdrasil_decision_forests/port/python/tools/build_linux_aarch64_release.sh index 4df0f874..5e4e58e4 100755 --- a/yggdrasil_decision_forests/port/python/tools/build_linux_aarch64_release.sh +++ b/yggdrasil_decision_forests/port/python/tools/build_linux_aarch64_release.sh @@ -16,7 +16,7 @@ # Builds all python versions for release on Pypi -PYTHON_VERSIONS=( 3.8 3.9 3.10 3.11 ) +PYTHON_VERSIONS=( 3.8 3.9 3.10 3.11 3.12 ) function build_py() { local PYTHON="python"$1 diff --git a/yggdrasil_decision_forests/port/python/tools/build_linux_release.sh b/yggdrasil_decision_forests/port/python/tools/build_linux_release.sh index 93bebfd4..06e8e56c 100755 --- a/yggdrasil_decision_forests/port/python/tools/build_linux_release.sh +++ b/yggdrasil_decision_forests/port/python/tools/build_linux_release.sh @@ -16,11 +16,11 @@ # Builds all python versions for release on Pypi -PYTHON_VERSIONS=( 3.8 3.9 3.10 3.11 ) +PYTHON_VERSIONS=( 3.8 3.9 3.10 3.11 3.12 ) function build_py() { local PYTHON="python"$1 - echo "Starting build with " $PYTHON + echo "Starting build with $PYTHON" $PYTHON -m venv /tmp/venv_$PYTHON source /tmp/venv_$PYTHON/bin/activate bazel clean --expunge diff --git a/yggdrasil_decision_forests/port/python/tools/build_macos_release.sh b/yggdrasil_decision_forests/port/python/tools/build_macos_release.sh index 24101252..e259c3bf 100755 --- a/yggdrasil_decision_forests/port/python/tools/build_macos_release.sh +++ b/yggdrasil_decision_forests/port/python/tools/build_macos_release.sh @@ -16,7 +16,7 @@ set -vex -declare -a python_versions=("3.8" "3.9" "3.10" "3.11") +declare -a python_versions=("3.8" "3.9" "3.10" "3.11" "3.12") for pyver in "${python_versions[@]}" do diff --git a/yggdrasil_decision_forests/port/python/tools/build_windows_release.bat b/yggdrasil_decision_forests/port/python/tools/build_windows_release.bat index 38339150..816f382c 100644 --- a/yggdrasil_decision_forests/port/python/tools/build_windows_release.bat +++ b/yggdrasil_decision_forests/port/python/tools/build_windows_release.bat @@ -34,7 +34,7 @@ cls setlocal -set YDF_VERSION=0.4.2 +set YDF_VERSION=0.4.3 set BAZEL=bazel.exe set BAZEL_SH=C:\msys64\usr\bin\bash.exe set BAZEL_FLAGS=--config=windows_cpp20 --config=windows_avx2 @@ -45,6 +45,7 @@ CALL :End2End 38 || goto :error CALL :End2End 39 || goto :error CALL :End2End 310 || goto :error CALL :End2End 311 || goto :error +CALL :End2End 312 || goto :error :: In case of error goto :EOF diff --git a/yggdrasil_decision_forests/port/python/ydf/BUILD b/yggdrasil_decision_forests/port/python/ydf/BUILD index d13b73c6..aea54afe 100644 --- a/yggdrasil_decision_forests/port/python/ydf/BUILD +++ b/yggdrasil_decision_forests/port/python/ydf/BUILD @@ -13,6 +13,7 @@ py_library( visibility = ["//visibility:public"], deps = [ ":version", + ":ydf_protos", "//ydf/dataset", "//ydf/dataset:dataspec", "//ydf/learner:custom_loss_py", @@ -32,6 +33,51 @@ py_library( ], ) +# All the YDF protos, including the ones possibly not used by PYDF. +py_library( + name = "ydf_protos", + deps = [ + "@ydf_cc//yggdrasil_decision_forests/dataset:data_spec_py_proto", + "@ydf_cc//yggdrasil_decision_forests/dataset:example_py_proto", + "@ydf_cc//yggdrasil_decision_forests/dataset:formats_py_proto", + "@ydf_cc//yggdrasil_decision_forests/dataset:synthetic_dataset_py_proto", + "@ydf_cc//yggdrasil_decision_forests/dataset:weight_py_proto", + "@ydf_cc//yggdrasil_decision_forests/learner:abstract_learner_py_proto", + "@ydf_cc//yggdrasil_decision_forests/learner/cart:cart_py_proto", + "@ydf_cc//yggdrasil_decision_forests/learner/decision_tree:decision_tree_py_proto", + "@ydf_cc//yggdrasil_decision_forests/learner/distributed_decision_tree:training_py_proto", + "@ydf_cc//yggdrasil_decision_forests/learner/distributed_decision_tree/dataset_cache:dataset_cache_py_proto", + "@ydf_cc//yggdrasil_decision_forests/learner/distributed_decision_tree/load_balancer:load_balancer_py_proto", + "@ydf_cc//yggdrasil_decision_forests/learner/distributed_gradient_boosted_trees:dgbt_py_proto", + "@ydf_cc//yggdrasil_decision_forests/learner/distributed_gradient_boosted_trees:worker_py_proto", + "@ydf_cc//yggdrasil_decision_forests/learner/generic_worker:generic_worker_py_proto", + "@ydf_cc//yggdrasil_decision_forests/learner/gradient_boosted_trees:gradient_boosted_trees_py_proto", + "@ydf_cc//yggdrasil_decision_forests/learner/gradient_boosted_trees/early_stopping:early_stopping_snapshot_py_proto", + "@ydf_cc//yggdrasil_decision_forests/learner/hyperparameters_optimizer:hyperparameters_optimizer_py_proto", + "@ydf_cc//yggdrasil_decision_forests/learner/hyperparameters_optimizer/optimizers:random_py_proto", + "@ydf_cc//yggdrasil_decision_forests/learner/multitasker:multitasker_py_proto", + "@ydf_cc//yggdrasil_decision_forests/learner/random_forest:random_forest_py_proto", + "@ydf_cc//yggdrasil_decision_forests/metric:metric_py_proto", + "@ydf_cc//yggdrasil_decision_forests/model:abstract_model_py_proto", + "@ydf_cc//yggdrasil_decision_forests/model:hyperparameter_py_proto", + "@ydf_cc//yggdrasil_decision_forests/model:prediction_py_proto", + "@ydf_cc//yggdrasil_decision_forests/model/decision_tree:decision_tree_py_proto", + "@ydf_cc//yggdrasil_decision_forests/model/gradient_boosted_trees:gradient_boosted_trees_py_proto", + "@ydf_cc//yggdrasil_decision_forests/model/multitasker:multitasker_py_proto", + "@ydf_cc//yggdrasil_decision_forests/model/random_forest:random_forest_py_proto", + "@ydf_cc//yggdrasil_decision_forests/serving:serving_py_proto", + "@ydf_cc//yggdrasil_decision_forests/utils:bitmap_py_proto", + "@ydf_cc//yggdrasil_decision_forests/utils:distribution_py_proto", + "@ydf_cc//yggdrasil_decision_forests/utils:fold_generator_py_proto", + "@ydf_cc//yggdrasil_decision_forests/utils:model_analysis_py_proto", + "@ydf_cc//yggdrasil_decision_forests/utils:partial_dependence_plot_py_proto", + "@ydf_cc//yggdrasil_decision_forests/utils/distribute:distribute_py_proto", + "@ydf_cc//yggdrasil_decision_forests/utils/distribute/implementations/grpc:grpc_py_proto", + "@ydf_cc//yggdrasil_decision_forests/utils/distribute/implementations/multi_thread:multi_thread_py_proto", + "@ydf_cc//yggdrasil_decision_forests/utils/distribute_cli:distribute_cli_py_proto", + ], +) + py_library( name = "version", srcs = ["version.py"], diff --git a/yggdrasil_decision_forests/port/python/ydf/api_test.py b/yggdrasil_decision_forests/port/python/ydf/api_test.py index aac541ec..30deab15 100644 --- a/yggdrasil_decision_forests/port/python/ydf/api_test.py +++ b/yggdrasil_decision_forests/port/python/ydf/api_test.py @@ -198,6 +198,11 @@ def test_export_tensorflow_saved_model(self): "TFDF is not supported anymore on python <= 3.8. Skipping TFDF tests." ) return + if not sys.version_info < (3, 12): + print( + "TFDF is not yet supported for python >= 3.12. Skipping TFDF tests." + ) + return model_path = os.path.join( test_utils.ydf_test_data_path(), "model", "adult_binary_class_rf" diff --git a/yggdrasil_decision_forests/port/python/ydf/learner/learner_test.py b/yggdrasil_decision_forests/port/python/ydf/learner/learner_test.py index ed9a7375..fd25a94e 100644 --- a/yggdrasil_decision_forests/port/python/ydf/learner/learner_test.py +++ b/yggdrasil_decision_forests/port/python/ydf/learner/learner_test.py @@ -496,15 +496,15 @@ def test_compare_pandas_and_path(self): def test_default_hp_dictionary(self): learner = specialized_learners.RandomForestLearner(label="l", num_trees=50) - self.assertDictContainsSubset( + self.assertLessEqual( { "num_trees": 50, "categorical_algorithm": "CART", "categorical_set_split_greedy_sampling": 0.1, "compute_oob_performances": True, "compute_oob_variable_importances": False, - }, - learner.hyperparameters, + }.items(), + learner.hyperparameters.items(), ) def test_multidimensional_training_dataset(self): diff --git a/yggdrasil_decision_forests/port/python/ydf/metric/metric_test.py b/yggdrasil_decision_forests/port/python/ydf/metric/metric_test.py index 5e235ddb..9fd72a51 100644 --- a/yggdrasil_decision_forests/port/python/ydf/metric/metric_test.py +++ b/yggdrasil_decision_forests/port/python/ydf/metric/metric_test.py @@ -150,9 +150,9 @@ def test_classification(self): evaluation = metric.Evaluation(proto_eval) print(evaluation) dict_eval = evaluation.to_dict() - self.assertDictContainsSubset( - {"accuracy": (1 + 4) / (1 + 2 + 3 + 4), "loss": 2.0, "num_examples": 1}, - dict_eval, + self.assertLessEqual( + {"accuracy": (1 + 4) / (1 + 2 + 3 + 4), "loss": 2.0, "num_examples": 1}.items(), + dict_eval.items(), ) self.assertEqual(dict_eval["confusion_matrix"].classes, ("1", "2")) diff --git a/yggdrasil_decision_forests/port/python/ydf/version.py b/yggdrasil_decision_forests/port/python/ydf/version.py index acf316fa..d066b205 100644 --- a/yggdrasil_decision_forests/port/python/ydf/version.py +++ b/yggdrasil_decision_forests/port/python/ydf/version.py @@ -12,4 +12,4 @@ # See the License for the specific language governing permissions and # limitations under the License. -version = "0.4.2" +version = "0.4.3" diff --git a/yggdrasil_decision_forests/tools/run_e2e_pydf_test.sh b/yggdrasil_decision_forests/tools/run_e2e_pydf_test.sh index ac28a2ae..7feafccc 100755 --- a/yggdrasil_decision_forests/tools/run_e2e_pydf_test.sh +++ b/yggdrasil_decision_forests/tools/run_e2e_pydf_test.sh @@ -20,7 +20,7 @@ # Usage example: # third_party/yggdrasil_decision_forests/tools/run_e2e_pydf_test.sh -set -ex +set -vex LOCAL_DIR="/usr/local/google/home/${USER}/git/yggdrasil-decision-forests" CL=$(hg exportedcl) @@ -57,22 +57,26 @@ run_test() { sudo docker start ${DOCKER_CONTAINER} set -e - PREPARE='yum update;yum install -y rsync;curl -L -o /usr/local/bin/bazel https://github.com/bazelbuild/bazelisk/releases/download/v1.19.0/bazelisk-linux-amd64;chmod +x /usr/local/bin/bazel;PYTHON=python3.11;$PYTHON -m venv /tmp/venv_$PYTHON;source /tmp/venv_$PYTHON/bin/activate;export COMPILERS="gcc"' + # Install the build dependencies + CMD="yum update;yum install -y rsync;curl -L -o /usr/local/bin/bazel https://github.com/bazelbuild/bazelisk/releases/download/v1.19.0/bazelisk-linux-amd64;chmod +x /usr/local/bin/bazel;export COMPILERS=gcc" - # Only the shell - CMD='$SHELL' - # Compile PYDF and give a shell - # CMD='./tools/test_pydf.sh;./tools/build_pydf.sh python;$SHELL' + # Only start a shell + CMD="${CMD};/bin/bash" + + # Or, compile PYDF and give a shell + # CMD="${CMD};PYTHON=python3.11;\$PYTHON -m venv /tmp/venv_\$PYTHON;source /tmp/venv_\$PYTHON/bin/activate" + # CMD="${CMD};./tools/test_pydf.sh;./tools/build_pydf.sh python;/bin/bash" # In the shell, you can: # - # If the test fails, you can restart it with: + # Build and run the units tests: + # PYTHON=python3.9;$PYTHON -m venv /tmp/venv_$PYTHON;source /tmp/venv_$PYTHON/bin/activate # ./tools/test_pydf.sh # - # To build a single pip package, run: + # To build a pip package from the artefacts of the previous command: # ./tools/build_pydf.sh python # - # To create the full PYDF release, with all the versions, run: + # Comile, test, and build the pip packages for all the python versions: # ./tools/build_linux_release.sh # # To start a notebook instance, run: @@ -83,7 +87,7 @@ run_test() { # sudo sudo docker stop [ID] # sudo docker system prune -a - sudo docker exec -it ${DOCKER_CONTAINER} /bin/bash -c "${PREPARE};${CMD}" + sudo docker exec -it ${DOCKER_CONTAINER} /bin/bash -c "${CMD}" } run_export