Skip to content

Commit

Permalink
[PYDF] Add export of models to c++
Browse files Browse the repository at this point in the history
PiperOrigin-RevId: 574468021
  • Loading branch information
achoum authored and copybara-github committed Oct 18, 2023
1 parent ebf6709 commit 46c961c
Show file tree
Hide file tree
Showing 15 changed files with 557 additions and 7 deletions.
20 changes: 20 additions & 0 deletions yggdrasil_decision_forests/api/BUILD
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
load("//yggdrasil_decision_forests/utils:compile.bzl", "cc_library_ydf")

package(
default_visibility = ["//visibility:public"],
licenses = ["notice"],
)

cc_library_ydf(
name = "serving",
hdrs = ["serving.h"],
deps = [
"//yggdrasil_decision_forests/model:abstract_model",
"//yggdrasil_decision_forests/model:all_models",
"//yggdrasil_decision_forests/model:model_library",
"//yggdrasil_decision_forests/serving:example_set",
"//yggdrasil_decision_forests/serving:fast_engine",
"@com_google_absl//absl/status:statusor",
"@com_google_absl//absl/strings",
],
)
57 changes: 57 additions & 0 deletions yggdrasil_decision_forests/api/serving.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
/*
* Copyright 2022 Google LLC.
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* https://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#ifndef YGGDRASIL_DECISION_FORESTS_API_SERVING_H_
#define YGGDRASIL_DECISION_FORESTS_API_SERVING_H_

#include <memory>

#include "absl/status/statusor.h"
#include "absl/strings/string_view.h"
#include "yggdrasil_decision_forests/model/abstract_model.h"
#include "yggdrasil_decision_forests/model/model_library.h"
#include "yggdrasil_decision_forests/serving/example_set.h"
#include "yggdrasil_decision_forests/serving/fast_engine.h"

namespace yggdrasil_decision_forests::serving_api {

using AbstractModel = ::yggdrasil_decision_forests::model::AbstractModel;
using FastEngine = ::yggdrasil_decision_forests::serving::FastEngine;
using FeaturesDefinition =
::yggdrasil_decision_forests::serving::FeaturesDefinition;

using NumericalFeatureId = ::yggdrasil_decision_forests::serving::
FeaturesDefinitionNumericalOrCategoricalFlat::NumericalFeatureId;
using CategoricalFeatureId = ::yggdrasil_decision_forests::serving::
FeaturesDefinitionNumericalOrCategoricalFlat::CategoricalFeatureId;
using CategoricalSetFeatureId = ::yggdrasil_decision_forests::serving::
FeaturesDefinitionNumericalOrCategoricalFlat::CategoricalSetFeatureId;
using BooleanFeatureId = ::yggdrasil_decision_forests::serving::
FeaturesDefinitionNumericalOrCategoricalFlat::BooleanFeatureId;
using MultiDimNumericalFeatureId = ::yggdrasil_decision_forests::serving::
FeaturesDefinitionNumericalOrCategoricalFlat::MultiDimNumericalFeatureId;

// Loads a model in memory. This model can then be compiled to be run
// efficiently.
inline absl::StatusOr<std::unique_ptr<AbstractModel>> LoadModel(
absl::string_view directory) {
std::unique_ptr<AbstractModel> model;
RETURN_IF_ERROR(model::LoadModel(directory, &model));
return model;
}

} // namespace yggdrasil_decision_forests::serving_api

#endif // YGGDRASIL_DECISION_FORESTS_API_SERVING_H_
6 changes: 6 additions & 0 deletions yggdrasil_decision_forests/port/python/ydf/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ py_library(
srcs = ["__init__.py"],
visibility = [":users"],
deps = [
":version",
"//ydf/dataset",
"//ydf/learner:generic_learner",
"//ydf/learner:specialized_learners",
Expand All @@ -38,6 +39,11 @@ py_library(
],
)

py_library(
name = "version",
srcs = ["version.py"],
)

# Tests
# =====

Expand Down
5 changes: 3 additions & 2 deletions yggdrasil_decision_forests/port/python/ydf/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,9 +21,10 @@
# TIP: If you need to import something here that isn't part of the public API,
# and therefore shouldn't show up in the documentation, import it with a private
# name:
# from yggdrasil_decision_forests.port.python import submodule as _submodule
# from ydf import submodule as _submodule

# Core
from ydf import version as _version
from ydf.dataset import dataset as _dataset
from ydf.learner import generic_learner as _generic_learner
from ydf.learner import specialized_learners as _specialized_learners
Expand All @@ -32,7 +33,7 @@
from ydf.model import model_lib as _model_lib
from ydf.model import random_forest_model as _random_forest_model

__version__ = "0.0.2"
__version__ = _version.version

# Dataset
create_vertical_dataset = _dataset.create_vertical_dataset
Expand Down
10 changes: 10 additions & 0 deletions yggdrasil_decision_forests/port/python/ydf/api_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -136,6 +136,16 @@ def test_cross_validation(self):
evaluation = learner.cross_validation(pd_ds)
logging.info(evaluation)

def test_export_to_cc(self):
model_path = os.path.join(
test_utils.ydf_test_data_path(), "model", "adult_binary_class_gbdt"
)
model = ydf.load_model(model_path)
logging.info(
"Copy the following in a .h file to run the model in C++:\n%s",
model.to_cpp(),
)


if __name__ == "__main__":
absltest.main()
3 changes: 2 additions & 1 deletion yggdrasil_decision_forests/port/python/ydf/cc/ydf.pyi
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from typing import Optional, TypeVar
from typing import Optional, TypeVar, List

# pylint: disable=g-wrong-blank-lines

Expand Down Expand Up @@ -69,6 +69,7 @@ class GenericCCModel:
def data_spec(self) -> data_spec_pb2.DataSpecification: ...
def Save(self, directory: str, file_prefix: Optional[str]): ...
def Describe(self, full_details: bool) -> str: ...
def input_features(self) -> List[int]: ...

class DecisionForestCCModel(GenericCCModel):
def num_trees(self) -> int: ...
Expand Down
56 changes: 55 additions & 1 deletion yggdrasil_decision_forests/port/python/ydf/model/BUILD
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# Model bindings for PYDF
# pytype test and library
# pytype binary, test, library
load("@pybind11_bazel//:build_defs.bzl", "pybind_library")
load("@ydf_cc//yggdrasil_decision_forests/utils:compile.bzl", "cc_library_ydf")

Expand Down Expand Up @@ -71,6 +71,7 @@ py_library(
srcs = ["generic_model.py"],
deps = [
":analysis",
":template_cpp_export",
# absl/logging dep,
# numpy dep,
"@ydf_cc//yggdrasil_decision_forests/metric:metric_py_proto",
Expand Down Expand Up @@ -129,6 +130,15 @@ py_library(
],
)

py_library(
name = "template_cpp_export",
srcs = ["template_cpp_export.py"],
deps = [
"//ydf:version",
"@ydf_cc//yggdrasil_decision_forests/dataset:data_spec_py_proto",
],
)

# Tests
# =====

Expand All @@ -151,3 +161,47 @@ py_test(
"//ydf/utils:test_utils",
],
)

cc_test(
name = "export_cc_run_test",
srcs = [
"export_cc_generated_lib.h",
"export_cc_run_test.cc",
],
data = [
":export_cc_generate_lib",
"@ydf_cc//yggdrasil_decision_forests/test_data",
],
tags = ["manual"],
deps = [
"@com_google_absl//absl/status",
"@com_google_absl//absl/status:statusor",
"@com_google_absl//absl/strings",
"@com_google_googletest//:gtest_main",
"@ydf_cc//yggdrasil_decision_forests/api:serving",
"@ydf_cc//yggdrasil_decision_forests/utils:filesystem",
"@ydf_cc//yggdrasil_decision_forests/utils:logging",
"@ydf_cc//yggdrasil_decision_forests/utils:test",
],
)

genrule(
name = "export_cc_generate_lib",
srcs = ["@ydf_cc//yggdrasil_decision_forests/test_data"],
outs = ["export_cc_generated_lib.h"],
cmd = "$(location export_cc_generator) --input_model=third_party/yggdrasil_decision_forests/test_data/model/adult_binary_class_gbdt --output_code=$@",
tags = ["manual"],
tools = [":export_cc_generator"],
)

py_binary(
name = "export_cc_generator",
srcs = ["export_cc_generator.py"],
python_version = "PY3",
deps = [
# absl:app dep,
# absl/flags dep,
# absl/logging dep,
"//ydf:api",
],
)
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
# Copyright 2022 Google LLC.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""Generates the c++ code to run a model for a unit-test."""


from collections.abc import Sequence

from absl import app
from absl import flags
from absl import logging

import ydf

_INPUT_MODEL = flags.DEFINE_string("input_model", None, "Path to input model")

_OUTPUT_CODE = flags.DEFINE_string(
"output_code", None, "Path to generated c++ file."
)


def process(input_model: str, output_code: str) -> None:
logging.info(
"Loading model %s and generating cc code in %s", input_model, output_code
)

model = ydf.load_model(input_model)
with open(output_code, "w") as f:
f.write(model.to_cpp("123"))


def main(argv: Sequence[str]) -> None:
if len(argv) > 1:
raise app.UsageError("Too many command-line arguments.")
process(_INPUT_MODEL.value, _OUTPUT_CODE.value)


if __name__ == "__main__":
app.run(main)
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
/*
* Copyright 2022 Google LLC.
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* https://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include <string>

#include "gmock/gmock.h"
#include "gtest/gtest.h"
#include "ydf/model/export_cc_generated_lib.h"
#include "yggdrasil_decision_forests/utils/filesystem.h"
#include "yggdrasil_decision_forests/utils/logging.h"
#include "yggdrasil_decision_forests/utils/test.h"

namespace yggdrasil_decision_forests {
namespace {

std::string TestDataDir() {
return file::JoinPath(test::DataRootDirectory(),
"yggdrasil_decision_forests/test_data");
}

TEST(RunModel, Base) {
const auto model = exported_model_123::Load(
file::JoinPath(TestDataDir(), "model", "adult_binary_class_gbdt"));
ASSERT_OK(model.status());

const auto predictions = model->Predict();

YDF_LOG(INFO) << "Predictions:";
for (const float p : predictions) {
YDF_LOG(INFO) << p;
}
}

} // namespace
} // namespace yggdrasil_decision_forests
35 changes: 35 additions & 0 deletions yggdrasil_decision_forests/port/python/ydf/model/generic_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
from ydf.dataset import dataset
from ydf.metric import metric
from ydf.model import analysis
from ydf.model import template_cpp_export
from yggdrasil_decision_forests.utils import model_analysis_pb2

# TODO: Allow a simpler input type (e.g. string)
Expand Down Expand Up @@ -284,5 +285,39 @@ def analyze(
analysis_proto = self._model.Analyze(ds._dataset, options_proto) # pylint: disable=protected-access
return analysis.Analysis(analysis_proto, options_proto)

def to_cpp(self, key: str = "my_model") -> str:
"""Generates the code of a .h file to run the model in C++.
How to use this function:
1. Copy the output of this function in a new .h file.
open("model.h", "w").write(model.to_cpp())
2. If you use Bazel/Blaze, create a rule with the dependencies:
//third_party/absl/status:statusor
//third_party/absl/strings
//external/ydf_cc/yggdrasil_decision_forests/api:serving
3. In your C++ code, include the .h file and call the model with:
// Load the model (to do only once).
namespace ydf = yggdrasil_decision_forests;
const auto model = ydf::exported_model_123::Load(<path to model>);
// Run the model
predictions = model.Predict();
4. The generated "Predict" function takes no inputs. Instead, it fill the
input features with placeholder values. Therefore, you will want to add
your input as arguments to the "Predict" function, and use it to populate
the "examples->Set..." section accordingly.
5. (Bonus) To further speed-up inference speed, you can pre-allocate and
reuse the "examples" and "predictions" for each model running threads.
This documentation is also available in the header of the generated content
for more details.
Args:
key: Name of the model. Used to define the c++ namespace of the model.
"""
return template_cpp_export.template(
key, self._model.data_spec(), self._model.input_features()
)


ModelType = TypeVar("ModelType", bound=GenericModel)
3 changes: 2 additions & 1 deletion yggdrasil_decision_forests/port/python/ydf/model/model.cc
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,8 @@ void init_model(py::module_& m) {
.def("name", &GenericCCModel::name)
.def("task", &GenericCCModel::task)
.def("data_spec", &GenericCCModel::data_spec)
.def("Describe", &GenericCCModel::Describe, py::arg("full_details"));
.def("Describe", &GenericCCModel::Describe, py::arg("full_details"))
.def("input_features", &GenericCCModel::input_features);

py::class_<DecisionForestCCModel,
/*parent class*/ GenericCCModel>(m, "DecisionForestCCModel")
Expand Down
Loading

0 comments on commit 46c961c

Please sign in to comment.