Skip to content

Commit

Permalink
Merge branch 'main' into viviannguyen/do-not-return-coords-for-dense
Browse files Browse the repository at this point in the history
  • Loading branch information
johnkerl committed May 2, 2024
2 parents 7e44da5 + 65e6baf commit 5ff4021
Show file tree
Hide file tree
Showing 56 changed files with 2,788 additions and 842 deletions.
21 changes: 8 additions & 13 deletions .github/workflows/r-ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -52,11 +52,6 @@ jobs:
- name: Bootstrap
run: cd apis/r && tools/r-ci.sh bootstrap

- name: Set pkgType to binary (macOS)
if: ${{ matrix.os == 'macOS-latest' }}
run: cat("\noptions(pkgType = 'binary')\n", file = "~/.Rprofile", append = TRUE)
shell: Rscript {0}

- name: Install BioConductor package SingleCellExperiment
run: cd apis/r && tools/r-ci.sh install_bioc SingleCellExperiment

Expand Down Expand Up @@ -89,12 +84,12 @@ jobs:
- name: Dependencies
run: cd apis/r && tools/r-ci.sh install_all

- name: Install dataset packages from source (macOS)
if: ${{ matrix.os == 'macOS-latest' }}
run: cd apis/r && _CI_PKG_TYPE_=both _CI_USE_BIOC_=true Rscript tools/install_missing_deps.R
# - name: Install dataset packages from source (macOS)
# if: ${{ matrix.os == 'macOS-latest' }}
# run: cd apis/r && _CI_PKG_TYPE_=both _CI_USE_BIOC_=true Rscript tools/install_missing_deps.R

- name: CMake
uses: lukka/get-cmake@latest
# - name: CMake
# uses: lukka/get-cmake@latest

#- name: MkVars
# run: mkdir ~/.R && echo "CXX17FLAGS=-Wno-deprecated-declarations -Wno-deprecated" > ~/.R/Makevars
Expand All @@ -106,8 +101,8 @@ jobs:
# if: ${{ matrix.os == 'ubuntu-latest' }}
# run: sudo ldconfig
#
- name: Update Packages
run: Rscript -e 'update.packages(ask=FALSE)'
# - name: Update Packages
# run: Rscript -e 'update.packages(ask=FALSE)'

# - name: Build Package
# run: cd apis/r && R CMD build --no-build-vignettes --no-manual .
Expand All @@ -126,7 +121,7 @@ jobs:
run: cd apis/r && tools/r-ci.sh run_tests

- name: Coverage
if: ${{ matrix.os == 'ubuntu-latest' && matrix.covr == 'yes' }}
if: ${{ matrix.os == 'ubuntu-latest' && matrix.covr == 'yes' && github.event_name == 'workflow_dispatch' }}
run: apis/r/tools/r-ci.sh coverage

- name: View Logs
Expand Down
2 changes: 1 addition & 1 deletion .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ repos:
- id: mypy
additional_dependencies:
- "pandas-stubs==1.5.3.230214"
- "somacore==1.0.10"
- "somacore==1.0.11"
- "types-setuptools==67.4.0.3"
args: ["--config-file=apis/python/pyproject.toml", "apis/python/src", "apis/python/devtools"]
pass_filenames: false
7 changes: 5 additions & 2 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -8,12 +8,15 @@ help:
# install
# -------------------------------------------------------------------

# set default variable values, if non-null
# Set default variable values, if non-null
# * build=Debug creates binary artifacts with symbols, e.g. for gdb
# * cmake_verbose=true creates Makefiles that produce full compile lines when executed
build ?= Release
cmake_verbose ?= false

.PHONY: install
install: clean
@./scripts/bld --prefix=${prefix} --tiledb=${tiledb} --build=${build}
@./scripts/bld --prefix=${prefix} --tiledb=${tiledb} --build=${build} --cmake-verbose=${cmake_verbose}
@TILEDB_PATH=${tiledb} pip install -v -e apis/python

.PHONY: r-build
Expand Down
3 changes: 2 additions & 1 deletion apis/python/setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -329,7 +329,8 @@ def run(self):
"pyarrow>=9.0.0; platform_system!='Darwin'",
"scanpy>=1.9.2",
"scipy",
"somacore==1.0.10",
# Note: the somacore version is in .pre-commit-config.yaml too
"somacore==1.0.11",
"tiledb~=0.28.0",
"typing-extensions", # Note "-" even though `import typing_extensions`
],
Expand Down
24 changes: 12 additions & 12 deletions apis/python/src/tiledbsoma/_dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -230,16 +230,17 @@ def create(
f"if domain is specified, it must have the same length as index_column_names; got {ndom} != {nidx}"
)

domains = []
extents = []
index_column_schema = []
index_column_data = {}

for index_column_name, slot_domain in zip(index_column_names, domain):
pa_type = schema.field(index_column_name).type
pa_field = schema.field(index_column_name)
dtype = _arrow_types.tiledb_type_from_arrow_type(
pa_type, is_indexed_column=True
pa_field.type, is_indexed_column=True
)

slot_domain = _fill_out_slot_domain(
slot_domain, index_column_name, pa_type, dtype
slot_domain, index_column_name, pa_field.type, dtype
)

extent = _find_extent_for_domain(
Expand All @@ -249,11 +250,12 @@ def create(
slot_domain,
)

domains.append(pa.array(slot_domain, type=pa_type))
extents.append(pa.array([extent], type=pa_type))
index_column_schema.append(pa_field)
index_column_data[pa_field.name] = [*slot_domain, extent]

domains = pa.StructArray.from_arrays(domains, names=index_column_names)
extents = pa.StructArray.from_arrays(extents, names=index_column_names)
index_column_info = pa.RecordBatch.from_pydict(
index_column_data, schema=pa.schema(index_column_schema)
)

plt_cfg = None
if platform_config:
Expand Down Expand Up @@ -282,9 +284,7 @@ def create(
clib.SOMADataFrame.create(
uri,
schema=schema,
index_column_names=index_column_names,
domains=domains,
extents=extents,
index_column_info=index_column_info,
ctx=context.native_context,
platform_config=plt_cfg,
timestamp=(0, timestamp_ms),
Expand Down
4 changes: 2 additions & 2 deletions apis/python/src/tiledbsoma/_exception.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,7 @@ class NotCreateableError(SOMAError):
pass


def is_not_createable_error(e: tiledb.TileDBError) -> bool:
def is_not_createable_error(e: Union[SOMAError, tiledb.TileDBError]) -> bool:
"""Given a TileDBError, return true if it indicates the object cannot be created
Lifecycle: experimental
Expand Down Expand Up @@ -132,7 +132,7 @@ def is_not_createable_error(e: tiledb.TileDBError) -> bool:
return False


def is_duplicate_group_key_error(e: tiledb.TileDBError) -> bool:
def is_duplicate_group_key_error(e: Union[SOMAError, tiledb.TileDBError]) -> bool:
"""Given a TileDBError, return try if it indicates a duplicate member
add request in a tiledb.Group.
Expand Down
30 changes: 13 additions & 17 deletions apis/python/src/tiledbsoma/soma_dataframe.cc
Original file line number Diff line number Diff line change
Expand Up @@ -53,9 +53,7 @@ void load_soma_dataframe(py::module& m) {
"create",
[](std::string_view uri,
py::object py_schema,
std::vector<std::string> index_columns_names,
py::object py_domains,
py::object py_extents,
py::object index_column_info,
std::shared_ptr<SOMAContext> context,
std::optional<PlatformConfig> platform_config,
std::optional<std::pair<uint64_t, uint64_t>> timestamp) {
Expand All @@ -80,22 +78,22 @@ void load_soma_dataframe(py::module& m) {
}
}

ArrowArray domains;
uintptr_t domains_ptr = (uintptr_t)(&domains);
py_domains.attr("_export_to_c")(domains_ptr);

ArrowArray extents;
uintptr_t extents_ptr = (uintptr_t)(&extents);
py_extents.attr("_export_to_c")(extents_ptr);
ArrowSchema index_column_schema;
ArrowArray index_column_array;
uintptr_t
index_column_schema_ptr = (uintptr_t)(&index_column_schema);
uintptr_t
index_column_array_ptr = (uintptr_t)(&index_column_array);
index_column_info.attr("_export_to_c")(
index_column_array_ptr, index_column_schema_ptr);

try {
SOMADataFrame::create(
uri,
std::make_unique<ArrowSchema>(schema),
ColumnIndexInfo(
index_columns_names,
std::make_shared<ArrowArray>(domains),
std::make_shared<ArrowArray>(extents)),
ArrowTable(
std::make_unique<ArrowArray>(index_column_array),
std::make_unique<ArrowSchema>(index_column_schema)),
context,
platform_config,
timestamp);
Expand All @@ -108,9 +106,7 @@ void load_soma_dataframe(py::module& m) {
"uri"_a,
py::kw_only(),
"schema"_a,
"index_column_names"_a,
"domains"_a,
"extents"_a,
"index_column_info"_a,
"ctx"_a,
"platform_config"_a,
"timestamp"_a = py::none())
Expand Down
15 changes: 15 additions & 0 deletions apis/python/tests/test_experiment_query.py
Original file line number Diff line number Diff line change
Expand Up @@ -388,6 +388,21 @@ def test_experiment_query_none(soma_experiment):
assert len(query.X("raw").tables().concat()) == 0


@pytest.mark.parametrize("n_obs,n_vars", [(1001, 99)])
def test_experiment_axis_query_with_none(soma_experiment):
"""Test query by value filter"""
obs_label_values = ["3", "7", "38", "99"]

with soma.ExperimentAxisQuery(
experiment=soma_experiment,
measurement_name="RNA",
obs_query=soma.AxisQuery(value_filter=f"label in {obs_label_values}"),
var_query=None,
) as query:
assert query.n_obs == len(obs_label_values)
assert query.obs().concat()["label"].to_pylist() == obs_label_values


@pytest.mark.parametrize("n_obs,n_vars,X_layer_names", [(1001, 99, ["A"])])
def test_joinid_caching(soma_experiment):
"""
Expand Down
2 changes: 1 addition & 1 deletion apis/r/DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ Description: Interface for working with 'TileDB'-based Stack of Matrices,
like those commonly used for single cell data analysis. It is documented at
<https://github.com/single-cell-data>; a formal specification available is at
<https://github.com/single-cell-data/SOMA/blob/main/abstract_specification.md>.
Version: 1.10.99
Version: 1.10.99.3
Authors@R: c(
person(given = "Aaron", family = "Wolen",
role = c("cre", "aut"), email = "[email protected]",
Expand Down
2 changes: 2 additions & 0 deletions apis/r/NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@

S3method("[[",MappingBase)
S3method("[[<-",MappingBase)
S3method(.read_soma_joinids,SOMADataFrame)
S3method(.read_soma_joinids,SOMASparseNDArray)
S3method(as.list,CoordsStrider)
S3method(as.list,MappingBase)
S3method(iterators::nextElem,CoordsStrider)
Expand Down
2 changes: 2 additions & 0 deletions apis/r/NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@
* Add support for reading `*m` and `*p` layers from `SOMAExperimentAxisQuery`
* Add support for blockwise iteration
* Make `reopen()` a public method for all `TileDBObjects`
* Add support for resume-mode in `write_soma()`
* Push default-setting for `TileDBCreateOptions` to `$initialize()` instead of in the accessors

# 1.7.0

Expand Down
Loading

0 comments on commit 5ff4021

Please sign in to comment.