Skip to content

Commit

Permalink
doc-gen: migrate scalar functions (array) documentation 1/3 (#13928)
Browse files Browse the repository at this point in the history
* doc-gen: migrate scalar functions (array) documentation 1/3

* fix: remove unsed import, fix typo and update function docs

---------

Co-authored-by: Cheng-Yuan-Lai <a186235@g,ail.com>
  • Loading branch information
Chen-Yuan-Lai and Cheng-Yuan-Lai authored Dec 29, 2024
1 parent a47729c commit 6a92870
Show file tree
Hide file tree
Showing 11 changed files with 446 additions and 605 deletions.
2 changes: 2 additions & 0 deletions datafusion-cli/Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 2 additions & 0 deletions datafusion/functions-nested/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -46,10 +46,12 @@ arrow-buffer = { workspace = true }
arrow-ord = { workspace = true }
arrow-schema = { workspace = true }
datafusion-common = { workspace = true }
datafusion-doc = { workspace = true }
datafusion-execution = { workspace = true }
datafusion-expr = { workspace = true }
datafusion-functions = { workspace = true }
datafusion-functions-aggregate = { workspace = true }
datafusion-macros = { workspace = true }
datafusion-physical-expr-common = { workspace = true }
itertools = { workspace = true, features = ["use_std"] }
log = { workspace = true }
Expand Down
160 changes: 67 additions & 93 deletions datafusion/functions-nested/src/array_has.rs
Original file line number Diff line number Diff line change
Expand Up @@ -25,17 +25,17 @@ use arrow_buffer::BooleanBuffer;
use datafusion_common::cast::as_generic_list_array;
use datafusion_common::utils::string_utils::string_array_to_vec;
use datafusion_common::{exec_err, Result, ScalarValue};
use datafusion_expr::scalar_doc_sections::DOC_SECTION_ARRAY;
use datafusion_expr::{
ColumnarValue, Documentation, ScalarUDFImpl, Signature, Volatility,
};
use datafusion_macros::user_doc;
use datafusion_physical_expr_common::datum::compare_with_eq;
use itertools::Itertools;

use crate::utils::make_scalar_function;

use std::any::Any;
use std::sync::{Arc, OnceLock};
use std::sync::Arc;

// Create static instances of ScalarUDFs for each function
make_udf_expr_and_func!(ArrayHas,
Expand All @@ -57,6 +57,27 @@ make_udf_expr_and_func!(ArrayHasAny,
array_has_any_udf // internal function name
);

#[user_doc(
doc_section(label = "Array Functions"),
description = "Returns true if the array contains the element.",
syntax_example = "array_has(array, element)",
sql_example = r#"```sql
> select array_has([1, 2, 3], 2);
+-----------------------------+
| array_has(List([1,2,3]), 2) |
+-----------------------------+
| true |
+-----------------------------+
```"#,
argument(
name = "array",
description = "Array expression. Can be a constant, column, or function, and any combination of array operators."
),
argument(
name = "element",
description = "Scalar or Array expression. Can be a constant, column, or function, and any combination of array operators."
)
)]
#[derive(Debug)]
pub struct ArrayHas {
signature: Signature,
Expand Down Expand Up @@ -138,41 +159,10 @@ impl ScalarUDFImpl for ArrayHas {
}

fn documentation(&self) -> Option<&Documentation> {
Some(get_array_has_doc())
self.doc()
}
}

static DOCUMENTATION: OnceLock<Documentation> = OnceLock::new();

fn get_array_has_doc() -> &'static Documentation {
DOCUMENTATION.get_or_init(|| {
Documentation::builder(
DOC_SECTION_ARRAY,
"Returns true if the array contains the element.",

"array_has(array, element)")
.with_sql_example(
r#"```sql
> select array_has([1, 2, 3], 2);
+-----------------------------+
| array_has(List([1,2,3]), 2) |
+-----------------------------+
| true |
+-----------------------------+
```"#,
)
.with_argument(
"array",
"Array expression. Can be a constant, column, or function, and any combination of array operators.",
)
.with_argument(
"element",
"Scalar or Array expression. Can be a constant, column, or function, and any combination of array operators.",
)
.build()
})
}

fn array_has_inner_for_scalar(
haystack: &ArrayRef,
needle: &dyn Datum,
Expand Down Expand Up @@ -287,6 +277,27 @@ fn array_has_any_inner(args: &[ArrayRef]) -> Result<ArrayRef> {
}
}

#[user_doc(
doc_section(label = "Array Functions"),
description = "Returns true if all elements of sub-array exist in array.",
syntax_example = "array_has_all(array, sub-array)",
sql_example = r#"```sql
> select array_has_all([1, 2, 3, 4], [2, 3]);
+--------------------------------------------+
| array_has_all(List([1,2,3,4]), List([2,3])) |
+--------------------------------------------+
| true |
+--------------------------------------------+
```"#,
argument(
name = "array",
description = "Array expression. Can be a constant, column, or function, and any combination of array operators."
),
argument(
name = "sub-array",
description = "Array expression. Can be a constant, column, or function, and any combination of array operators."
)
)]
#[derive(Debug)]
pub struct ArrayHasAll {
signature: Signature,
Expand Down Expand Up @@ -337,39 +348,31 @@ impl ScalarUDFImpl for ArrayHasAll {
}

fn documentation(&self) -> Option<&Documentation> {
Some(get_array_has_all_doc())
self.doc()
}
}

fn get_array_has_all_doc() -> &'static Documentation {
DOCUMENTATION.get_or_init(|| {
Documentation::builder(
DOC_SECTION_ARRAY,
"Returns true if all elements of sub-array exist in array.",

"array_has_all(array, sub-array)")
.with_sql_example(
r#"```sql
> select array_has_all([1, 2, 3, 4], [2, 3]);
+--------------------------------------------+
| array_has_all(List([1,2,3,4]), List([2,3])) |
+--------------------------------------------+
| true |
+--------------------------------------------+
#[user_doc(
doc_section(label = "Array Functions"),
description = "Returns true if any elements exist in both arrays.",
syntax_example = "array_has_any(array, sub-array)",
sql_example = r#"```sql
> select array_has_any([1, 2, 3], [3, 4]);
+------------------------------------------+
| array_has_any(List([1,2,3]), List([3,4])) |
+------------------------------------------+
| true |
+------------------------------------------+
```"#,
)
.with_argument(
"array",
"Array expression. Can be a constant, column, or function, and any combination of array operators.",
)
.with_argument(
"sub-array",
"Array expression. Can be a constant, column, or function, and any combination of array operators.",
)
.build()
})
}

argument(
name = "array",
description = "Array expression. Can be a constant, column, or function, and any combination of array operators."
),
argument(
name = "sub-array",
description = "Array expression. Can be a constant, column, or function, and any combination of array operators."
)
)]
#[derive(Debug)]
pub struct ArrayHasAny {
signature: Signature,
Expand Down Expand Up @@ -420,39 +423,10 @@ impl ScalarUDFImpl for ArrayHasAny {
}

fn documentation(&self) -> Option<&Documentation> {
Some(get_array_has_any_doc())
self.doc()
}
}

fn get_array_has_any_doc() -> &'static Documentation {
DOCUMENTATION.get_or_init(|| {
Documentation::builder(
DOC_SECTION_ARRAY,
"Returns true if any elements exist in both arrays.",

"array_has_any(array, sub-array)")
.with_sql_example(
r#"```sql
> select array_has_any([1, 2, 3], [3, 4]);
+------------------------------------------+
| array_has_any(List([1,2,3]), List([3,4])) |
+------------------------------------------+
| true |
+------------------------------------------+
```"#,
)
.with_argument(
"array",
"Array expression. Can be a constant, column, or function, and any combination of array operators.",
)
.with_argument(
"sub-array",
"Array expression. Can be a constant, column, or function, and any combination of array operators.",
)
.build()
})
}

/// Represents the type of comparison for array_has.
#[derive(Debug, PartialEq, Clone, Copy)]
enum ComparisonType {
Expand Down
50 changes: 20 additions & 30 deletions datafusion/functions-nested/src/cardinality.rs
Original file line number Diff line number Diff line change
Expand Up @@ -26,13 +26,13 @@ use arrow_schema::DataType::{FixedSizeList, LargeList, List, Map, UInt64};
use datafusion_common::cast::{as_large_list_array, as_list_array, as_map_array};
use datafusion_common::Result;
use datafusion_common::{exec_err, plan_err};
use datafusion_expr::scalar_doc_sections::DOC_SECTION_ARRAY;
use datafusion_expr::{
ArrayFunctionSignature, ColumnarValue, Documentation, ScalarUDFImpl, Signature,
TypeSignature, Volatility,
};
use datafusion_macros::user_doc;
use std::any::Any;
use std::sync::{Arc, OnceLock};
use std::sync::Arc;

make_udf_expr_and_func!(
Cardinality,
Expand All @@ -57,6 +57,23 @@ impl Cardinality {
}
}

#[user_doc(
doc_section(label = "Array Functions"),
description = "Returns the total number of elements in the array.",
syntax_example = "cardinality(array)",
sql_example = r#"```sql
> select cardinality([[1, 2, 3, 4], [5, 6, 7, 8]]);
+--------------------------------------+
| cardinality(List([1,2,3,4,5,6,7,8])) |
+--------------------------------------+
| 8 |
+--------------------------------------+
```"#,
argument(
name = "array",
description = "Array expression. Can be a constant, column, or function, and any combination of array operators."
)
)]
#[derive(Debug)]
pub(super) struct Cardinality {
signature: Signature,
Expand Down Expand Up @@ -96,37 +113,10 @@ impl ScalarUDFImpl for Cardinality {
}

fn documentation(&self) -> Option<&Documentation> {
Some(get_cardinality_doc())
self.doc()
}
}

static DOCUMENTATION: OnceLock<Documentation> = OnceLock::new();

fn get_cardinality_doc() -> &'static Documentation {
DOCUMENTATION.get_or_init(|| {
Documentation::builder(
DOC_SECTION_ARRAY,
"Returns the total number of elements in the array.",

"cardinality(array)")
.with_sql_example(
r#"```sql
> select cardinality([[1, 2, 3, 4], [5, 6, 7, 8]]);
+--------------------------------------+
| cardinality(List([1,2,3,4,5,6,7,8])) |
+--------------------------------------+
| 8 |
+--------------------------------------+
```"#,
)
.with_argument(
"array",
"Array expression. Can be a constant, column, or function, and any combination of array operators.",
)
.build()
})
}

/// Cardinality SQL function
pub fn cardinality_inner(args: &[ArrayRef]) -> Result<ArrayRef> {
if args.len() != 1 {
Expand Down
Loading

0 comments on commit 6a92870

Please sign in to comment.