Skip to content

Commit

Permalink
DF 0.4.1 release.
Browse files Browse the repository at this point in the history
PiperOrigin-RevId: 626343734
  • Loading branch information
achoum authored and copybara-github committed Apr 19, 2024
1 parent c82f647 commit 66f1641
Show file tree
Hide file tree
Showing 9 changed files with 79 additions and 144 deletions.
9 changes: 9 additions & 0 deletions yggdrasil_decision_forests/port/python/CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,14 @@
# Changelog

## 0.4.1- 2024-04-18

### Fix

- Solve dependency collision to YDF between PYDF and TF-DF. If TF-DF is
installed after PYDF, importing YDF will fails with a `has no attribute
'DType'` error.
- Allow for training on cached TensorFlow dataset.

## 0.4.0 - 2024-04-10

### Feature
Expand Down
2 changes: 1 addition & 1 deletion yggdrasil_decision_forests/port/python/config/setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
from setuptools.command.install import install
from setuptools.dist import Distribution

_VERSION = "0.4.0"
_VERSION = "0.4.1"

with open("README.md", "r", encoding="utf-8") as fh:
long_description = fh.read()
Expand Down
36 changes: 32 additions & 4 deletions yggdrasil_decision_forests/port/python/tools/assembly_pip_files.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,12 +42,35 @@ def rec_glob_copy(src_dir: str, dst_dir: str, pattern: str):
s.copy(f"{src_dir}/{frel}", dst)


def replace_in_files(src_dir, extension, old_string, new_string):
"""Replaces a string in all files with a given extension within a directory."""

for root, _, filenames in os.walk(src_dir):
for filename in filenames:
if filename.endswith(extension):
filepath = os.path.join(root, filename)

# Read file content
with open(filepath, "r") as f:
file_content = f.read()

# Replace the string
new_content = file_content.replace(old_string, new_string)

# Overwrite the file with the modified content
with open(filepath, "w") as f:
f.write(new_content)


# Remove and recreate the package directory
if os.path.exists(DST_PK):
try:
s.rmtree(DST_PK)
except Exception:
print("Fail to remove the existing dir with rmtree. Use rmdir instead.")
print(
"Fail to remove the existing dir with rmtree. Use rmdir instead (only"
" for Windows)."
)
os.system(f"rmdir /S /Q {DST_PK}")
os.makedirs(DST_PK)

Expand All @@ -69,10 +92,10 @@ def rec_glob_copy(src_dir: str, dst_dir: str, pattern: str):
os.makedirs(f"{DST_PK}/ydf/learner")
s.copy(f"{SRC_BIN}/learner/specialized_learners.py", f"{DST_PK}/ydf/learner")

# The YDF protos
# Copy the YDF c++ protos
rec_glob_copy(
"bazel-bin/external/ydf_cc/yggdrasil_decision_forests",
f"{DST_PK}/yggdrasil_decision_forests",
f"{DST_PK}/ydf/proto",
"**/*.py",
)

Expand All @@ -81,6 +104,11 @@ def rec_glob_copy(src_dir: str, dst_dir: str, pattern: str):

# Create the missing __init__.py files
INIT_FILENAME = "__init__.py"
for path, _, files in os.walk(f"{DST_PK}/yggdrasil_decision_forests"):
for path, _, files in os.walk(f"{DST_PK}/ydf/proto"):
if INIT_FILENAME not in files:
Path(f"{path}/{INIT_FILENAME}").touch()

# Change path to YDF proto files
replace_in_files(
DST_PK, ".py", "from yggdrasil_decision_forests.", "from ydf.proto."
)
130 changes: 3 additions & 127 deletions yggdrasil_decision_forests/port/python/tools/build_pydf.sh
Original file line number Diff line number Diff line change
Expand Up @@ -22,17 +22,6 @@
# Usage example:
# # Generate the pip package with python3.9
# ./tools/build_pydf.sh python3.9
#
# # Generate the pip package for all the versions of python using pyenv.
# # Make sure the package are compatible with manylinux2014.
# ./tools/build_pip_package.sh ALL_VERSIONS
#
# Requirements:
#
# pyenv (if using ALL_VERSIONS_ALREADY_ASSEMBLED or ALL_VERSIONS)
# See https://github.com/pyenv/pyenv-installer
# Will be installed by this script if INSTALL_PYENV is set to INSTALL_PYENV.
#

set -xve

Expand Down Expand Up @@ -78,33 +67,7 @@ function check_is_build() {
function assemble_files() {
check_is_build

rm -fr ${SRCPK}
mkdir -p ${SRCPK}
cp -R ydf config/setup.py config/MANIFEST.in README.md CHANGELOG.md ${SRCPK}

# When cross-compiling, adapt setup.py
if [ ${ARG} == "ALL_VERSIONS_MAC_CROSSCOMPILE" ]; then
sed -i'.bak' -e "s/MAC_CROSS_COMPILED = False/MAC_CROSS_COMPILED = True/" ${SRCPK}/setup.py
fi

# YDF's wrappers and .so.
SRCBIN="bazel-bin/ydf"
cp ${SRCBIN}/cc/ydf.so ${SRCPK}/ydf/cc/

cp ${SRCBIN}/learner/specialized_learners.py ${SRCPK}/ydf/learner/

# YDF's proto wrappers.
YDFSRCBIN="bazel-bin/external/ydf_cc/yggdrasil_decision_forests"
mkdir -p ${SRCPK}/yggdrasil_decision_forests
pushd ${YDFSRCBIN}
find . -name \*.py -exec rsync -R -arv {} ${SRCPK}/yggdrasil_decision_forests \;
popd

# Copy the license file from YDF
cp bazel-python/external/ydf_cc/LICENSE ${SRCPK}

# Add __init__.py to all exported Yggdrasil sub-directories.
find ${SRCPK}/yggdrasil_decision_forests -type d -exec touch {}/__init__.py \;
${PYTHON} tools/assembly_pip_files.py
}

# Build a pip package.
Expand Down Expand Up @@ -194,92 +157,5 @@ function e2e_native() {
test_package ${PYTHON} ${PACKAGE}
}

# Builds and tests a pip package in Pyenv.
function e2e_pyenv() {
VERSION="$1"
shift

# Don't force updating pyenv, we use a fixed version.
# pyenv update

ENVNAME=env_${VERSION}
pyenv install ${VERSION} -s

# Enable pyenv virtual environment.
set +e
pyenv virtualenv ${VERSION} ${ENVNAME}
set -e
pyenv activate ${ENVNAME}

e2e_native python3

# Disable virtual environment.
pyenv deactivate
}

ARG="$1"
INSTALL_PYENV="$2"
shift | true

if [ ${INSTALL_PYENV} == "INSTALL_PYENV" ]; then
if ! [ -x "$(command -v pyenv)" ]; then
echo "Pyenv not found."
echo "Installing build deps, pyenv 2.3.7 and pyenv virtualenv 1.2.1"
# Install python dependencies.
if ! is_macos; then
sudo apt-get update
sudo apt-get install -qq make build-essential libssl-dev zlib1g-dev \
libbz2-dev libreadline-dev libsqlite3-dev wget curl llvm \
libncursesw5-dev xz-utils tk-dev libxml2-dev libxmlsec1-dev \
libffi-dev liblzma-dev patchelf
fi
git clone https://github.com/pyenv/pyenv.git
(
cd pyenv && git checkout 74f923b5fca82054b3c579f9eb936338c7f5a394
)
PYENV_ROOT="$(pwd)/pyenv"
export PATH="$PYENV_ROOT/bin:$PATH"
eval "$(pyenv init --path)"
eval "$(pyenv init -)"
git clone https://github.com/pyenv/pyenv-virtualenv.git $(pyenv root)/plugins/pyenv-virtualenv
(
cd $(pyenv root)/plugins/pyenv-virtualenv && git checkout 13bc1877ef06ed038c65dcab4e901da6ea6c67ae
)
eval "$(pyenv init --path)"
eval "$(pyenv init -)"
eval "$(pyenv virtualenv-init -)"
fi
fi

if [ -z "${ARG}" ]; then
echo "The first argument should be one of:"
echo " ALL_VERSIONS: Build all pip packages using pyenv."
echo " ALL_VERSIONS_ALREADY_ASSEMBLED: Build all pip packages from already assembled files using pyenv."
echo " ALL_VERSIONS_MAC_CROSSCOMPILE: Build all pip packages from already assembled files using pyenv and cross-compile between MacOS ARM64 / Intel builds."
echo " Python binary (e.g. python3.9): Build a pip package for a specific python version without pyenv."
exit 1
elif [ ${ARG} == "ALL_VERSIONS" ]; then
# Compile with all the version of python using pyenv.
assemble_files
eval "$(pyenv init -)"
e2e_pyenv 3.9.12
e2e_pyenv 3.10.4
e2e_pyenv 3.11.0
elif [ ${ARG} == "ALL_VERSIONS_ALREADY_ASSEMBLED" ]; then
eval "$(pyenv init -)"
e2e_pyenv 3.9.12
e2e_pyenv 3.10.4
e2e_pyenv 3.11.0
elif [ ${ARG} == "ALL_VERSIONS_MAC_CROSSCOMPILE" ]; then
eval "$(pyenv init -)"
assemble_files
e2e_pyenv 3.9.12
e2e_pyenv 3.10.4
e2e_pyenv 3.11.0
else
# Compile with a specific version of python provided in the call arguments.
assemble_files
PYTHON=${ARG}
e2e_native ${PYTHON}
fi

PYTHON="$1"
assemble_files ${PYTHON}
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@
cls
setlocal

set YDF_VERSION=0.4.0
set YDF_VERSION=0.4.1
set BAZEL=bazel.exe
set BAZEL_SH=C:\msys64\usr\bin\bash.exe
set BAZEL_FLAGS=--config=windows_cpp20 --config=windows_avx2
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,22 +14,36 @@

"""Connectors for loading data from Pandas dataframes."""

import logging
import sys
from typing import Dict

from ydf.dataset.io import dataset_io_types


def is_tensorflow_dataset(data: dataset_io_types.IODataset) -> bool:
# Note: We only test if the dataset is a TensorFlow dataset if the object name
# look like a TensorFlow object. This way, we avoid importing TF is not
# necessary.
return (
"tensorflow" in str(type(data))
and data.__class__.__name__
in ("_BatchDataset", "_MapDataset", "DatasetV1Adapter")
and hasattr(data, "rebatch")
)
str_class = str(type(data))
if "tensorflow" in str_class and hasattr(data, "rebatch"):

if data.__class__.__name__ in (
"_BatchDataset",
"_MapDataset",
"DatasetV1Adapter",
"CacheDataset",
):
return True

if "data.ops" in str_class:
logging.warning(
"The dataset %s object is not listed as a YDF compatible TensorFlow"
" Dataset, but it looks like one",
str_class,
)
return True

return False


def to_dict(
Expand Down
1 change: 1 addition & 0 deletions yggdrasil_decision_forests/port/python/ydf/learner/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -302,6 +302,7 @@ py_test(
":generic_learner",
":specialized_learners",
# absl/testing:absltest dep,
# absl/testing:parameterized dep,
# pandas dep,
# tensorflow:tensorflow_no_contrib dep,
],
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,8 @@

"""Tests for model learning."""


from absl.testing import absltest
from absl.testing import parameterized
import pandas as pd
import tensorflow as tf

Expand All @@ -34,9 +34,12 @@ def toy_dataset():
return df


class RandomForestLearnerTest(absltest.TestCase):
class RandomForestLearnerTest(parameterized.TestCase):

def test_tensorflow_dataset(self):
@parameterized.parameters({"use_cache": True}, {"use_filter": True})
def test_tensorflow_dataset(
self, use_cache: bool = False, use_filter: bool = False
):
learner = specialized_learners.RandomForestLearner(
label="label", num_trees=1
)
Expand All @@ -45,6 +48,10 @@ def test_tensorflow_dataset(self):
)
for x in tf_dataset.take(2):
print(x)
if use_cache:
tf_dataset = tf_dataset.cache()
if use_filter:
tf_dataset = tf_dataset.filter(lambda x: True)
self.assertEqual(
learner.train(tf_dataset).task(), generic_learner.Task.CLASSIFICATION
)
Expand Down
2 changes: 1 addition & 1 deletion yggdrasil_decision_forests/port/python/ydf/version.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,4 +12,4 @@
# See the License for the specific language governing permissions and
# limitations under the License.

version = "0.4.0"
version = "0.4.1"

0 comments on commit 66f1641

Please sign in to comment.