voxel51 · jacobsela · Nov 25, 2024 · Oct 15, 2024 · Oct 17, 2024 · Oct 21, 2024
diff --git a/fiftyone/brain/__init__.py b/fiftyone/brain/__init__.py
@@ -703,3 +703,99 @@ def compute_exact_duplicates(
     return fbd.compute_exact_duplicates(
         samples, num_workers, skip_failures, progress
     )
+
+
+def compute_leaky_splits(
+    samples,
+    brain_key=None,
+    split_views=None,
+    split_field=None,
+    split_tags=None,
+    threshold=0.2,
+    similarity_brain_key=None,
+    embeddings_field=None,
+    model=None,
+    model_kwargs=None,
+    similarity_backend=None,
+    similarity_config_dict=None,
+    **kwargs,
+):
+    """Uses a similarity index or creates one on the spot to find leaks.
+
+    Calling this method only creates the index. You can then call the methods
+    exposed on the returned object to perform the following operations:
+
+    -   :meth:`leaks <fiftyone.brain.core.internal.leaky_splits.LeakySplitIndex.leaks>`:
+        Returns a view of all leaks in the dataset.
+
+    -   :meth:`no_leaks_view <fiftyone.brain.core.internal.leaky_splits.LeakySplitIndex.no_leaks_view>`:
+        Returns a subset of the given view without any leaks.
+
+    -   :meth:`leaks_for_sample <fiftyone.brain.core.internal.leaky_splits.LeakySplitIndex.leaks_for_sample>`:
+        Returns a view with leaks corresponding to the given sample.
+
+    -   :meth:`tag_leaks <fiftyone.brain.core.internal.leaky_splits.LeakySplitIndex.tag_leaks>`:
+        Tags leaks in the dataset as leaks.
+
+
+    Args:
+        samples: a :class:`fiftyone.core.collections.SampleCollection`
+        brain_key (None): a brain key under which to store the results of this
+            method. If no brain key is provided the results will not be saved.
+        split_views (None): a dict of :class:`fiftyone.core.view.DatasetView`
+            corresponding to different splits in the datset. Only one of
+            `split_views`, `split_field`, and `splits_tags` need to be used.
+        split_field (None): a string name of a field that holds the split of the sample.
+            Each unique value in the field will be treated as a split.
+            Only one of `split_views`, `split_field`, and `splits_tags` need to be used.
+        split_tags (None): a list of strings, tags corresponding to differents splits.
+            Only one of `split_views`, `split_field`, and `splits_tags` need to be used.
+        threshold (0.2): The threshold to run the algorithm with. Values between
+            0.1 - 0.25 tend to give good results.
+        similarity_brain_key (None): a brain key for the similarity index
+            If the brain key exists already, it will load up the similarity index corresponding to it
+            If the brain key does not exist already, a new similarity index will be created
+            and the results will be saved under this name
+        embeddings_field (None): field for embeddings to feed the index. This argument's
+            behavior depends on whether a ``model`` is provided, as described
+            below.
+            If no ``model`` is provided, this argument specifies the field of pre-computed
+            embeddings to use
+            If a ``model`` is provided, this argument specifies where to store
+            the model's embeddings
+        model (None): a :class:`fiftyone.core.models.Model` or the name of a
+            model from the
+            `FiftyOne Model Zoo <https://docs.voxel51.com/user_guide/model_zoo/index.html>`_
+            to use, or that was already used, to generate embeddings. The model
+            must expose embeddings (``model.has_embeddings = True``)
+        model_kwargs (None): a dictionary of optional keyword arguments to pass
+            to the model's ``Config`` when a model name is provided
+        similarity_backend: string, the similarity backend to use. The supported values are
+            ``fiftyone.brain.brain_config.similarity_backends.keys()`` and the
+            default is
+            ``fiftyone.brain.brain_config.default_similarity_backend``
+        similarity_config_dict: dict, used to build the similarity backend. Arguments take
+            precedence over the values in the dict (e.g. model)
+
+    Returns:
+        a :class:`fiftyone.brain.internal.core.leaky_splits.LeakySplitsIndex`,
+        a :class:`fiftyone.core.view.DatasetView`
+    """
+
+    from fiftyone.brain.internal.core.leaky_splits import compute_leaky_splits
+
+    return compute_leaky_splits(
+        samples,
+        brain_key=brain_key,
+        split_views=split_views,
+        split_field=split_field,
+        split_tags=split_tags,
+        threshold=threshold,
+        similarity_brain_key=similarity_brain_key,
+        embeddings_field=embeddings_field,
+        model=model,
+        model_kwargs=model_kwargs,
+        similarity_backend=similarity_backend,
+        similarity_config_dict=similarity_config_dict,
+        **kwargs,
+    )