Skip to content

Commit

Permalink
Merge pull request #37 from acalejos/plotting
Browse files Browse the repository at this point in the history
Draft: Plotting
  • Loading branch information
acalejos authored Jan 27, 2024
2 parents 9649af0 + cf10353 commit 3321105
Show file tree
Hide file tree
Showing 18 changed files with 2,545 additions and 32 deletions.
6 changes: 4 additions & 2 deletions .github/workflows/precompile.yml
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
name: precompile

on: push
on:
- push
- workflow_dispatch

jobs:
linux:
Expand Down Expand Up @@ -52,7 +54,7 @@ jobs:
MIX_ENV: "prod"
strategy:
matrix:
runner: ["macos-latest", "self-hosted"]
runner: ["macos-latest", "exgboost-m2-runner"]
otp: ["25.0", "26.0"]
elixir: ["1.14.5"]
steps:
Expand Down
3 changes: 2 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -11,4 +11,5 @@ erl_crash.dump
.elixir_ls/
.tool-versions
.vscode/
checksum.exs
checksum.exs
.DS_Store
1 change: 1 addition & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,7 @@ $(XGBOOST_LIB_DIR_FLAG):
git fetch --depth 1 --recurse-submodules origin $(XGBOOST_GIT_REV) && \
git checkout FETCH_HEAD && \
git submodule update --init --recursive && \
sed 's|learner_parameters\["generic_param"\] = ToJson(ctx_);|&\nlearner_parameters\["default_metric"\] = String(obj_->DefaultEvalMetric());|' src/learner.cc > src/learner.cc.tmp && mv src/learner.cc.tmp src/learner.cc && \
cmake -DCMAKE_INSTALL_PREFIX=$(XGBOOST_LIB_DIR) -B build . $(CMAKE_FLAGS) && \
make -C build -j1 install
touch $(XGBOOST_LIB_DIR_FLAG)
Expand Down
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ billions of examples.
```elixir
def deps do
[
{:exgboost, "~> 0.3"}
{:exgboost, "~> 0.5"}
]
end
```
Expand Down
76 changes: 73 additions & 3 deletions lib/exgboost.ex
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ defmodule EXGBoost do
```elixir
def deps do
[
{:exgboost, "~> 0.4"}
{:exgboost, "~> 0.5"}
]
end
```
Expand Down Expand Up @@ -92,7 +92,7 @@ defmodule EXGBoost do
preds = EXGBoost.train(X, y) |> EXGBoost.predict(X)
```
## Serliaztion
## Serialization
A Booster can be serialized to a file using `EXGBoost.write_*` and loaded from a file
using `EXGBoost.read_*`. The file format can be specified using the `:format` option
Expand All @@ -113,6 +113,34 @@ defmodule EXGBoost do
- `config` - Save the configuration only.
- `weights` - Save the model parameters only. Use this when you want to save the model to a format that can be ingested by other XGBoost APIs.
- `model` - Save both the model parameters and the configuration.
## Plotting
`EXGBoost.plot_tree/2` is the primary entry point for plotting a tree from a trained model.
It accepts an `EXGBoost.Booster` struct (which is the output of `EXGBoost.train/2`).
`EXGBoost.plot_tree/2` returns a VegaLite spec that can be rendered in a notebook or saved to a file.
`EXGBoost.plot_tree/2` also accepts a keyword list of options that can be used to configure the plotting process.
See `EXGBoost.Plotting` for more detail on plotting.
You can see available styles by running `EXGBoost.Plotting.get_styles()` or refer to the `EXGBoost.Plotting.Styles`
documentation for a gallery of the styles.
## Kino & Livebook Integration
`EXGBoost` integrates with [Kino](https://hexdocs.pm/kino/Kino.html) and [Livebook](https://livebook.dev/)
to provide a rich interactive experience for data scientists.
EXGBoost implements the `Kino.Render` protocol for `EXGBoost.Booster` structs. This allows you to render
a Booster in a Livebook notebook. Under the hood, `EXGBoost` uses [Vega-Lite](https://vega.github.io/vega-lite/)
and [Kino Vega-Lite](https://hexdocs.pm/kino_vega_lite/Kino.VegaLite.html) to render the Booster.
See the [`Plotting in EXGBoost`](notebooks/plotting.livemd) Notebook for an example of how to use `EXGBoost` with `Kino` and `Livebook`.
## Examples
See the example Notebooks in the left sidebar (under the `Pages` tab) for more examples and tutorials
on how to use EXGBoost.
"""

alias EXGBoost.ArrayInterface
Expand All @@ -121,13 +149,15 @@ defmodule EXGBoost do
alias EXGBoost.DMatrix
alias EXGBoost.ProxyDMatrix
alias EXGBoost.Training
alias EXGBoost.Plotting

@doc """
Check the build information of the xgboost library.
Returns a map containing information about the build.
"""
@spec xgboost_build_info() :: map()
@doc type: :system
def xgboost_build_info,
do: EXGBoost.NIF.xgboost_build_info() |> Internal.unwrap!() |> Jason.decode!()

Expand All @@ -137,6 +167,7 @@ defmodule EXGBoost do
Returns a 3-tuple in the form of `{major, minor, patch}`.
"""
@spec xgboost_version() :: {integer(), integer(), integer()} | {:error, String.t()}
@doc type: :system
def xgboost_version, do: EXGBoost.NIF.xgboost_version() |> Internal.unwrap!()

@doc """
Expand All @@ -147,6 +178,7 @@ defmodule EXGBoost do
for the full list of parameters supported in the global configuration.
"""
@spec set_config(map()) :: :ok | {:error, String.t()}
@doc type: :system
def set_config(%{} = config) do
config = EXGBoost.Parameters.validate_global!(config)
EXGBoost.NIF.set_global_config(Jason.encode!(config)) |> Internal.unwrap!()
Expand All @@ -160,6 +192,7 @@ defmodule EXGBoost do
for the full list of parameters supported in the global configuration.
"""
@spec get_config() :: map()
@doc type: :system
def get_config do
EXGBoost.NIF.get_global_config() |> Internal.unwrap!() |> Jason.decode!()
end
Expand Down Expand Up @@ -208,10 +241,11 @@ defmodule EXGBoost do
* `opts` - Refer to `EXGBoost.Parameters` for the full list of options.
"""
@spec train(Nx.Tensor.t(), Nx.Tensor.t(), Keyword.t()) :: EXGBoost.Booster.t()
@doc type: :train_pred
def train(x, y, opts \\ []) do
x = Nx.concatenate(x)
y = Nx.concatenate(y)
{dmat_opts, opts} = Keyword.split(opts, Internal.dmatrix_feature_opts())
dmat_opts = Keyword.take(opts, Internal.dmatrix_feature_opts())
dmat = DMatrix.from_tensor(x, y, Keyword.put_new(dmat_opts, :format, :dense))
Training.train(dmat, opts)
end
Expand Down Expand Up @@ -272,6 +306,7 @@ defmodule EXGBoost do
Returns an Nx.Tensor containing the predictions.
"""
@doc type: :train_pred
def predict(%Booster{} = bst, x, opts \\ []) do
x = Nx.concatenate(x)
{dmat_opts, opts} = Keyword.split(opts, Internal.dmatrix_feature_opts())
Expand Down Expand Up @@ -302,6 +337,7 @@ defmodule EXGBoost do
Returns an Nx.Tensor containing the predictions.
"""
@doc type: :train_pred
def inplace_predict(%Booster{} = boostr, data, opts \\ []) do
opts =
Keyword.validate!(opts,
Expand Down Expand Up @@ -428,6 +464,7 @@ defmodule EXGBoost do
## Options
#{NimbleOptions.docs(@write_schema)}
"""
@doc type: :serialization
@spec write_model(Booster.t(), String.t()) :: :ok | {:error, String.t()}
def write_model(%Booster{} = booster, path, opts \\ []) do
opts = NimbleOptions.validate!(opts, @write_schema)
Expand All @@ -437,6 +474,7 @@ defmodule EXGBoost do
@doc """
Read a model from a file and return the Booster.
"""
@doc type: :serialization
@spec read_model(String.t()) :: EXGBoost.Booster.t()
def read_model(path) do
EXGBoost.Booster.load(path, deserialize: :model)
Expand All @@ -449,6 +487,7 @@ defmodule EXGBoost do
#{NimbleOptions.docs(@dump_schema)}
"""
@spec dump_model(Booster.t()) :: binary()
@doc type: :serialization
def dump_model(%Booster{} = booster, opts \\ []) do
opts = NimbleOptions.validate!(opts, @dump_schema)
EXGBoost.Booster.save(booster, opts ++ [serialize: :model, to: :buffer])
Expand All @@ -458,6 +497,7 @@ defmodule EXGBoost do
Read a model from a buffer and return the Booster.
"""
@spec load_model(binary()) :: EXGBoost.Booster.t()
@doc type: :serialization
def load_model(buffer) do
EXGBoost.Booster.load(buffer, deserialize: :model, from: :buffer)
end
Expand All @@ -469,6 +509,7 @@ defmodule EXGBoost do
#{NimbleOptions.docs(@write_schema)}
"""
@spec write_config(Booster.t(), String.t()) :: :ok | {:error, String.t()}
@doc type: :serialization
def write_config(%Booster{} = booster, path, opts \\ []) do
opts = NimbleOptions.validate!(opts, @write_schema)
EXGBoost.Booster.save(booster, opts ++ [path: path, serialize: :config])
Expand All @@ -481,6 +522,7 @@ defmodule EXGBoost do
#{NimbleOptions.docs(@dump_schema)}
"""
@spec dump_config(Booster.t()) :: binary()
@doc type: :serialization
def dump_config(%Booster{} = booster, opts \\ []) do
opts = NimbleOptions.validate!(opts, @dump_schema)
EXGBoost.Booster.save(booster, opts ++ [serialize: :config, to: :buffer])
Expand All @@ -493,6 +535,7 @@ defmodule EXGBoost do
#{NimbleOptions.docs(@load_schema)}
"""
@spec read_config(String.t()) :: EXGBoost.Booster.t()
@doc type: :serialization
def read_config(path, opts \\ []) do
opts = NimbleOptions.validate!(opts, @load_schema)
EXGBoost.Booster.load(path, opts ++ [deserialize: :config])
Expand All @@ -505,6 +548,7 @@ defmodule EXGBoost do
#{NimbleOptions.docs(@load_schema)}
"""
@spec load_config(binary()) :: EXGBoost.Booster.t()
@doc type: :serialization
def load_config(buffer, opts \\ []) do
opts = NimbleOptions.validate!(opts, @load_schema)
EXGBoost.Booster.load(buffer, opts ++ [deserialize: :config, from: :buffer])
Expand All @@ -517,6 +561,7 @@ defmodule EXGBoost do
#{NimbleOptions.docs(@write_schema)}
"""
@spec write_weights(Booster.t(), String.t()) :: :ok | {:error, String.t()}
@doc type: :serialization
def write_weights(%Booster{} = booster, path, opts \\ []) do
opts = NimbleOptions.validate!(opts, @write_schema)
EXGBoost.Booster.save(booster, opts ++ [path: path, serialize: :weights])
Expand All @@ -529,6 +574,7 @@ defmodule EXGBoost do
#{NimbleOptions.docs(@dump_schema)}
"""
@spec dump_weights(Booster.t()) :: binary()
@doc type: :serialization
def dump_weights(%Booster{} = booster, opts \\ []) do
opts = NimbleOptions.validate!(opts, @dump_schema)
EXGBoost.Booster.save(booster, opts ++ [serialize: :weights, to: :buffer])
Expand All @@ -538,6 +584,7 @@ defmodule EXGBoost do
Read a model's trained parameters from a file and return the Booster.
"""
@spec read_weights(String.t()) :: EXGBoost.Booster.t()
@doc type: :serialization
def read_weights(path) do
EXGBoost.Booster.load(path, deserialize: :weights)
end
Expand All @@ -546,7 +593,30 @@ defmodule EXGBoost do
Read a model's trained parameters from a buffer and return the Booster.
"""
@spec load_weights(binary()) :: EXGBoost.Booster.t()
@doc type: :serialization
def load_weights(buffer) do
EXGBoost.Booster.load(buffer, deserialize: :weights, from: :buffer)
end

@doc """
Plot a tree from a Booster model and save it to a file.
## Options
* `:format` - the format to export the graphic as, must be either of: `:json`, `:html`, `:png`, `:svg`, `:pdf`. By default the format is inferred from the file extension.
* `:local_npm_prefix` - a relative path pointing to a local npm project directory where the necessary npm packages are installed. For instance, in Phoenix projects you may want to pass local_npm_prefix: "assets". By default the npm packages are searched for in the current directory and globally.
* `:path` - the path to save the graphic to. If not provided, the graphic is returned as a VegaLite spec.
* `:opts` - additional options to pass to `EXGBoost.Plotting.plot/2`. See `EXGBoost.Plotting` for more information.
"""
@doc type: :plotting
def plot_tree(booster, opts \\ []) do
{path, opts} = Keyword.pop(opts, :path)
{save_opts, opts} = Keyword.split(opts, [:format, :local_npm_prefix])
vega = Plotting.plot(booster, opts)

if path != nil do
VegaLite.Export.save!(vega, path, save_opts)
else
vega
end
end
end
12 changes: 12 additions & 0 deletions lib/exgboost/booster.ex
Original file line number Diff line number Diff line change
Expand Up @@ -163,9 +163,15 @@ defmodule EXGBoost.Booster do
def booster(dmats, opts \\ [])

def booster(dmats, opts) when is_list(dmats) do
{str_opts, opts} = Keyword.split(opts, Internal.dmatrix_str_feature_opts())
opts = EXGBoost.Parameters.validate!(opts)
refs = Enum.map(dmats, & &1.ref)
booster_ref = EXGBoost.NIF.booster_create(refs) |> Internal.unwrap!()

Enum.each(str_opts, fn {key, value} ->
EXGBoost.NIF.booster_set_str_feature_info(booster_ref, Atom.to_string(key), value)
end)

set_params(%__MODULE__{ref: booster_ref}, opts)
end

Expand All @@ -174,9 +180,15 @@ defmodule EXGBoost.Booster do
end

def booster(%__MODULE__{} = bst, opts) do
{str_opts, opts} = Keyword.split(opts, Internal.dmatrix_str_feature_opts())
opts = EXGBoost.Parameters.validate!(opts)
boostr_bytes = EXGBoost.NIF.booster_serialize_to_buffer(bst.ref) |> Internal.unwrap!()
booster_ref = EXGBoost.NIF.booster_deserialize_from_buffer(boostr_bytes) |> Internal.unwrap!()

Enum.each(str_opts, fn {key, value} ->
EXGBoost.NIF.booster_set_str_feature_info(booster_ref, Atom.to_string(key), value)
end)

set_params(%__MODULE__{ref: booster_ref}, opts)
end

Expand Down
1 change: 0 additions & 1 deletion lib/exgboost/nif.ex
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,6 @@ defmodule EXGBoost.NIF do
def dmatrix_create_from_file(_file_uri, _silent),
do: :erlang.nif_error(:not_implemented)

@since "0.4.0"
def dmatrix_create_from_uri(_config), do: :erlang.nif_error(:not_implemented)

@spec dmatrix_create_from_mat(binary, integer(), integer(), float()) ::
Expand Down
Loading

0 comments on commit 3321105

Please sign in to comment.