Skip to content

Commit

Permalink
Internal change
Browse files Browse the repository at this point in the history
PiperOrigin-RevId: 406833837
  • Loading branch information
achoum authored and copybara-github committed Nov 1, 2021
1 parent 9905823 commit b0a3994
Show file tree
Hide file tree
Showing 14 changed files with 44 additions and 15 deletions.
2 changes: 1 addition & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# Changelog

## 0.1.6 - ???
## 0.2.0 - 2021-10-29

### Features

Expand Down
3 changes: 2 additions & 1 deletion third_party/farmhash/workspace.bzl
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,8 @@ load("@bazel_tools//tools/build_defs/repo:http.bzl", "http_archive")

def deps(prefix = ""):
http_archive(
name = "com_google_farmhash",
# The name should match TF's name for farmhash lib.
name = "farmhash_archive",
build_file = prefix + "//third_party/farmhash:farmhash.BUILD",
strip_prefix = "farmhash-master",
# Does not have any release.
Expand Down
3 changes: 2 additions & 1 deletion tools/build_binary_release.bat
Original file line number Diff line number Diff line change
Expand Up @@ -32,4 +32,5 @@ copy configure\cli_readme.txt dist\README
.\%CLI%\evaluate.exe^
.\%CLI%\convert_dataset.exe^
.\%CLI%\benchmark_inference.exe^
.\%CLI%\utils\synthetic_dataset.exe
.\%CLI%\utils\synthetic_dataset.exe^
.\%CLI%\distribute\implementations\grpc\grpc_worker_main.exe
14 changes: 13 additions & 1 deletion tools/build_binary_release.sh
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,19 @@ cp -f configure/cli_readme.txt ${CLI}/README
cp -f documentation/cli.txt ${CLI}/

pushd ${CLI}
zip cli_linux.zip README cli.txt train show_model show_dataspec predict infer_dataspec evaluate convert_dataset benchmark_inference utils/synthetic_dataset
zip -j cli_linux.zip \
README \
cli.txt \
train \
show_model \
show_dataspec \
predict \
infer_dataspec \
evaluate \
convert_dataset \
benchmark_inference \
utils/synthetic_dataset \
../utils/distribute/implementations/grpc/grpc_worker_main
popd

mkdir -p dist
Expand Down
4 changes: 2 additions & 2 deletions yggdrasil_decision_forests/dataset/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ cc_library_ydf(
"@com_google_absl//absl/status",
"@com_google_absl//absl/strings",
"@com_google_absl//absl/strings:str_format",
"@com_google_farmhash//:farmhash",
"@farmhash_archive//:farmhash",
"@org_tensorflow//tensorflow/core:protos_all_cc",
"//yggdrasil_decision_forests/utils:compatibility",
"//yggdrasil_decision_forests/utils:logging",
Expand Down Expand Up @@ -371,7 +371,7 @@ cc_library_ydf(
":vertical_dataset",
"@com_google_absl//absl/status",
"@com_google_absl//absl/strings",
"@com_google_farmhash//:farmhash",
"@farmhash_archive//:farmhash",
"@org_tensorflow//tensorflow/core/example:example_protos_cc",
"//yggdrasil_decision_forests/utils:csv",
"//yggdrasil_decision_forests/utils:filesystem",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,7 @@ absl::Status IntegerColumnWriter::Open(absl::string_view path,
int64_t max_value) {
num_bytes_ = NumBytes(max_value);
max_value_ = max_value;
path_ = path;
path_ = std::string(path);
return file_.Open(path);
}

Expand Down Expand Up @@ -249,7 +249,7 @@ template <typename Value>
absl::Status ShardedIntegerColumnReader<Value>::Open(
absl::string_view base_path, int64_t max_value, int max_num_values,
int begin_shard_idx, int end_shard_idx) {
base_path_ = base_path;
base_path_ = std::string(base_path);
max_value_ = max_value;
max_num_values_ = max_num_values;
end_shard_idx_ = end_shard_idx;
Expand Down Expand Up @@ -410,7 +410,7 @@ template class InMemoryIntegerColumnReaderFactory<int32_t>;
template class InMemoryIntegerColumnReaderFactory<int64_t>;

absl::Status FloatColumnWriter::Open(absl::string_view path) {
path_ = path;
path_ = std::string(path);
return file_.Open(path);
}

Expand Down Expand Up @@ -470,7 +470,7 @@ absl::Status ShardedFloatColumnReader::Open(absl::string_view base_path,
int max_num_values,
int begin_shard_idx,
int end_shard_idx) {
base_path_ = base_path;
base_path_ = std::string(base_path);
max_num_values_ = max_num_values;
end_shard_idx_ = end_shard_idx;
current_shard_idx_ = begin_shard_idx;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -269,7 +269,7 @@ absl::Status DatasetCacheReader::LoadInMemoryCache() {
num_columns);

const auto begin = absl::Now();
std::atomic<size_t> memory_usage = 0;
std::atomic<size_t> memory_usage{0};

{
absl::Status worker_status;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,9 @@ namespace yggdrasil_decision_forests {
namespace model {
namespace distributed_decision_tree {
namespace dataset_cache {

constexpr char CreateDatasetCacheWorker::kWorkerKey[];

namespace {
using Blob = distribute::Blob;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1102,7 +1102,7 @@ absl::Status EmitShareSplits(
}

absl::Status EmitEndIter(int iter_idx, distribute::AbstractManager* distribute,
std::optional<Evaluation*> training_evaluation,
absl::optional<Evaluation*> training_evaluation,
internal::Monitoring* monitoring) {
monitoring->BeginStage(internal::Monitoring::kEndIter);

Expand Down Expand Up @@ -1547,7 +1547,7 @@ void Monitoring::FindSplitWorkerReplyTime(int worker_idx,
LOG(INFO) << "\tWorker #" << worker_idx << " replied to FindSplits in "
<< delay;
}
last_min_split_reply_times_.push_back(std::pair(worker_idx, delay));
last_min_split_reply_times_.push_back({worker_idx, delay});
}

absl::string_view Monitoring::StageName(Monitoring::Stages stage) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -339,7 +339,7 @@ absl::Status EmitShareSplits(
internal::Monitoring* monitoring);

absl::Status EmitEndIter(int iter_idx, distribute::AbstractManager* distribute,
std::optional<Evaluation*> training_evaluation,
absl::optional<Evaluation*> training_evaluation,
internal::Monitoring* monitoring);

absl::Status EmitRestoreCheckpoint(int iter_idx, int num_shards,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,8 @@ namespace yggdrasil_decision_forests {
namespace model {
namespace distributed_gradient_boosted_trees {

constexpr char DistributedGradientBoostedTreesWorker::kWorkerKey[];

absl::Status DistributedGradientBoostedTreesWorker::Setup(
distribute::Blob serialized_welcome) {
ASSIGN_OR_RETURN(welcome_, utils::ParseBinaryProto<proto::WorkerWelcome>(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,8 @@ namespace {
using Blob = distribute::Blob;
}

constexpr char GenericWorker::kWorkerKey[];

absl::Status GenericWorker::TrainModel(
const proto::Request::TrainModel& request,
proto::Result::TrainModel* result) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -460,7 +460,10 @@ TEST_F(RandomForestOnAdult, MaximumSize) {
train_config_.set_maximum_model_size_in_memory_in_bytes(max_size);

TrainAndEvaluateModel();
EXPECT_LT(model_->ModelSizeInBytes().value(), max_size);
// Add an extra 2kB to help with the test flakiness.
// Note: the model can be slighly larger than the
// "set_maximum_model_size_in_memory_in_bytes" directive.
EXPECT_LT(model_->ModelSizeInBytes().value(), max_size + 2 * 1024);

EXPECT_GT(metric::Accuracy(evaluation_), 0.840);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,11 @@ all_proto_library(

cc_binary_ydf(
name = "grpc_worker_main",
deps = [":grpc_worker_lib_with_main"],
)

cc_library_ydf(
name = "grpc_worker_lib_with_main",
srcs = ["grpc_worker_main.cc"],
deps = [
":grpc_worker",
Expand Down

0 comments on commit b0a3994

Please sign in to comment.