Skip to content

Commit 4efbeee

Browse files
authored
Improve docs for Exprs and scalar functions (#16036)
* Improve docs for Exprs and scalar functions * fix links
1 parent d6fe1de commit 4efbeee

File tree

2 files changed

+56
-41
lines changed

2 files changed

+56
-41
lines changed

datafusion/expr/src/expr.rs

Lines changed: 32 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -312,35 +312,15 @@ pub enum Expr {
312312
Negative(Box<Expr>),
313313
/// Whether an expression is between a given range.
314314
Between(Between),
315-
/// The CASE expression is similar to a series of nested if/else and there are two forms that
316-
/// can be used. The first form consists of a series of boolean "when" expressions with
317-
/// corresponding "then" expressions, and an optional "else" expression.
318-
///
319-
/// ```text
320-
/// CASE WHEN condition THEN result
321-
/// [WHEN ...]
322-
/// [ELSE result]
323-
/// END
324-
/// ```
325-
///
326-
/// The second form uses a base expression and then a series of "when" clauses that match on a
327-
/// literal value.
328-
///
329-
/// ```text
330-
/// CASE expression
331-
/// WHEN value THEN result
332-
/// [WHEN ...]
333-
/// [ELSE result]
334-
/// END
335-
/// ```
315+
/// A CASE expression (see docs on [`Case`])
336316
Case(Case),
337317
/// Casts the expression to a given type and will return a runtime error if the expression cannot be cast.
338318
/// This expression is guaranteed to have a fixed type.
339319
Cast(Cast),
340320
/// Casts the expression to a given type and will return a null value if the expression cannot be cast.
341321
/// This expression is guaranteed to have a fixed type.
342322
TryCast(TryCast),
343-
/// Represents the call of a scalar function with a set of arguments.
323+
/// Call a scalar function with a set of arguments.
344324
ScalarFunction(ScalarFunction),
345325
/// Calls an aggregate function with arguments, and optional
346326
/// `ORDER BY`, `FILTER`, `DISTINCT` and `NULL TREATMENT`.
@@ -349,7 +329,7 @@ pub enum Expr {
349329
///
350330
/// [`ExprFunctionExt`]: crate::expr_fn::ExprFunctionExt
351331
AggregateFunction(AggregateFunction),
352-
/// Represents the call of a window function with arguments.
332+
/// Call a window function with a set of arguments.
353333
WindowFunction(WindowFunction),
354334
/// Returns whether the list contains the expr value.
355335
InList(InList),
@@ -378,7 +358,7 @@ pub enum Expr {
378358
/// A place holder for parameters in a prepared statement
379359
/// (e.g. `$foo` or `$1`)
380360
Placeholder(Placeholder),
381-
/// A place holder which hold a reference to a qualified field
361+
/// A placeholder which holds a reference to a qualified field
382362
/// in the outer query, used for correlated sub queries.
383363
OuterReferenceColumn(DataType, Column),
384364
/// Unnest expression
@@ -551,6 +531,28 @@ impl Display for BinaryExpr {
551531
}
552532

553533
/// CASE expression
534+
///
535+
/// The CASE expression is similar to a series of nested if/else and there are two forms that
536+
/// can be used. The first form consists of a series of boolean "when" expressions with
537+
/// corresponding "then" expressions, and an optional "else" expression.
538+
///
539+
/// ```text
540+
/// CASE WHEN condition THEN result
541+
/// [WHEN ...]
542+
/// [ELSE result]
543+
/// END
544+
/// ```
545+
///
546+
/// The second form uses a base expression and then a series of "when" clauses that match on a
547+
/// literal value.
548+
///
549+
/// ```text
550+
/// CASE expression
551+
/// WHEN value THEN result
552+
/// [WHEN ...]
553+
/// [ELSE result]
554+
/// END
555+
/// ```
554556
#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Hash)]
555557
pub struct Case {
556558
/// Optional base expression that can be compared to literal values in the "when" expressions
@@ -631,7 +633,9 @@ impl Between {
631633
}
632634
}
633635

634-
/// ScalarFunction expression invokes a built-in scalar function
636+
/// Invoke a [`ScalarUDF`] with a set of arguments
637+
///
638+
/// [`ScalarUDF`]: crate::ScalarUDF
635639
#[derive(Clone, PartialEq, Eq, PartialOrd, Hash, Debug)]
636640
pub struct ScalarFunction {
637641
/// The function
@@ -648,7 +652,9 @@ impl ScalarFunction {
648652
}
649653

650654
impl ScalarFunction {
651-
/// Create a new ScalarFunction expression with a user-defined function (UDF)
655+
/// Create a new `ScalarFunction` from a [`ScalarUDF`]
656+
///
657+
/// [`ScalarUDF`]: crate::ScalarUDF
652658
pub fn new_udf(udf: Arc<crate::ScalarUDF>, args: Vec<Expr>) -> Self {
653659
Self { func: udf, args }
654660
}

datafusion/expr/src/udf.rs

Lines changed: 24 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -34,19 +34,19 @@ use std::sync::Arc;
3434
///
3535
/// A scalar function produces a single row output for each row of input. This
3636
/// struct contains the information DataFusion needs to plan and invoke
37-
/// functions you supply such name, type signature, return type, and actual
37+
/// functions you supply such as name, type signature, return type, and actual
3838
/// implementation.
3939
///
4040
/// 1. For simple use cases, use [`create_udf`] (examples in [`simple_udf.rs`]).
4141
///
4242
/// 2. For advanced use cases, use [`ScalarUDFImpl`] which provides full API
4343
/// access (examples in [`advanced_udf.rs`]).
4444
///
45-
/// See [`Self::call`] to invoke a `ScalarUDF` with arguments.
45+
/// See [`Self::call`] to create an `Expr` which invokes a `ScalarUDF` with arguments.
4646
///
4747
/// # API Note
4848
///
49-
/// This is a separate struct from `ScalarUDFImpl` to maintain backwards
49+
/// This is a separate struct from [`ScalarUDFImpl`] to maintain backwards
5050
/// compatibility with the older API.
5151
///
5252
/// [`create_udf`]: crate::expr_fn::create_udf
@@ -568,13 +568,15 @@ pub trait ScalarUDFImpl: Debug + Send + Sync {
568568
}
569569

570570
/// Returns true if some of this `exprs` subexpressions may not be evaluated
571-
/// and thus any side effects (like divide by zero) may not be encountered
572-
/// Setting this to true prevents certain optimizations such as common subexpression elimination
571+
/// and thus any side effects (like divide by zero) may not be encountered.
572+
///
573+
/// Setting this to true prevents certain optimizations such as common
574+
/// subexpression elimination
573575
fn short_circuits(&self) -> bool {
574576
false
575577
}
576578

577-
/// Computes the output interval for a [`ScalarUDFImpl`], given the input
579+
/// Computes the output [`Interval`] for a [`ScalarUDFImpl`], given the input
578580
/// intervals.
579581
///
580582
/// # Parameters
@@ -590,9 +592,11 @@ pub trait ScalarUDFImpl: Debug + Send + Sync {
590592
Interval::make_unbounded(&DataType::Null)
591593
}
592594

593-
/// Updates bounds for child expressions, given a known interval for this
594-
/// function. This is used to propagate constraints down through an expression
595-
/// tree.
595+
/// Updates bounds for child expressions, given a known [`Interval`]s for this
596+
/// function.
597+
///
598+
/// This function is used to propagate constraints down through an
599+
/// expression tree.
596600
///
597601
/// # Parameters
598602
///
@@ -641,20 +645,25 @@ pub trait ScalarUDFImpl: Debug + Send + Sync {
641645
}
642646
}
643647

644-
/// Whether the function preserves lexicographical ordering based on the input ordering
648+
/// Returns true if the function preserves lexicographical ordering based on
649+
/// the input ordering.
650+
///
651+
/// For example, `concat(a || b)` preserves lexicographical ordering, but `abs(a)` does not.
645652
fn preserves_lex_ordering(&self, _inputs: &[ExprProperties]) -> Result<bool> {
646653
Ok(false)
647654
}
648655

649656
/// Coerce arguments of a function call to types that the function can evaluate.
650657
///
651-
/// This function is only called if [`ScalarUDFImpl::signature`] returns [`crate::TypeSignature::UserDefined`]. Most
652-
/// UDFs should return one of the other variants of `TypeSignature` which handle common
653-
/// cases
658+
/// This function is only called if [`ScalarUDFImpl::signature`] returns
659+
/// [`crate::TypeSignature::UserDefined`]. Most UDFs should return one of
660+
/// the other variants of [`TypeSignature`] which handle common cases.
654661
///
655662
/// See the [type coercion module](crate::type_coercion)
656663
/// documentation for more details on type coercion
657664
///
665+
/// [`TypeSignature`]: crate::TypeSignature
666+
///
658667
/// For example, if your function requires a floating point arguments, but the user calls
659668
/// it like `my_func(1::int)` (i.e. with `1` as an integer), coerce_types can return `[DataType::Float64]`
660669
/// to ensure the argument is converted to `1::double`
@@ -698,8 +707,8 @@ pub trait ScalarUDFImpl: Debug + Send + Sync {
698707

699708
/// Returns the documentation for this Scalar UDF.
700709
///
701-
/// Documentation can be accessed programmatically as well as
702-
/// generating publicly facing documentation.
710+
/// Documentation can be accessed programmatically as well as generating
711+
/// publicly facing documentation.
703712
fn documentation(&self) -> Option<&Documentation> {
704713
None
705714
}

0 commit comments

Comments
 (0)