Skip to content

Commit f89f200

Browse files
authored
Set up basic CI, added tests, and implemented the mode function. (#2)
* Set up basic CI, added tests, and implemented the mode function.
1 parent 6355b9d commit f89f200

24 files changed

+3832
-16
lines changed

.editorconfig

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
#
2+
# Licensed to the Apache Software Foundation (ASF) under one or more
3+
# contributor license agreements. See the NOTICE file distributed with
4+
# this work for additional information regarding copyright ownership.
5+
# The ASF licenses this file to You under the Apache License, Version 2.0
6+
# (the "License"); you may not use this file except in compliance with
7+
# the License. You may obtain a copy of the License at
8+
#
9+
# http://www.apache.org/licenses/LICENSE-2.0
10+
#
11+
# Unless required by applicable law or agreed to in writing, software
12+
# distributed under the License is distributed on an "AS IS" BASIS,
13+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
# See the License for the specific language governing permissions and
15+
# limitations under the License.
16+
#
17+
18+
root = true
19+
20+
[*]
21+
charset = utf-8
22+
end_of_line = lf
23+
insert_final_newline = true
24+
25+
[*.rs]
26+
indent_style = space
27+
indent_size = 4

.github/workflows/ci.yml

Lines changed: 70 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,70 @@
1+
# Licensed to the Apache Software Foundation (ASF) under one
2+
# or more contributor license agreements. See the NOTICE file
3+
# distributed with this work for additional information
4+
# regarding copyright ownership. The ASF licenses this file
5+
# to you under the Apache License, Version 2.0 (the
6+
# "License"); you may not use this file except in compliance
7+
# with the License. You may obtain a copy of the License at
8+
#
9+
# http://www.apache.org/licenses/LICENSE-2.0
10+
#
11+
# Unless required by applicable law or agreed to in writing,
12+
# software distributed under the License is distributed on an
13+
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14+
# KIND, either express or implied. See the License for the
15+
# specific language governing permissions and limitations
16+
# under the License.
17+
18+
name: CI
19+
20+
on:
21+
push:
22+
branches:
23+
- main
24+
tags:
25+
- '**'
26+
pull_request: {}
27+
28+
jobs:
29+
lint:
30+
runs-on: ubuntu-latest
31+
steps:
32+
- uses: actions/checkout@v3
33+
34+
- name: Install Rust toolchain components
35+
uses: dtolnay/rust-toolchain@stable
36+
with:
37+
components: rustfmt, clippy
38+
39+
- id: cache-rust
40+
uses: Swatinem/rust-cache@v2
41+
42+
- name: Run pre-commit lint checks
43+
uses: pre-commit/[email protected]
44+
with:
45+
extra_args: --all-files --verbose
46+
env:
47+
PRE_COMMIT_COLOR: always
48+
SKIP: test
49+
50+
test:
51+
name: Test with Rust-${{ matrix.rust-version }}
52+
strategy:
53+
fail-fast: false
54+
matrix:
55+
rust-version: [stable, nightly]
56+
57+
runs-on: ubuntu-latest
58+
steps:
59+
- uses: actions/checkout@v3
60+
61+
- name: Set up Rust toolchain
62+
uses: dtolnay/rust-toolchain@master
63+
with:
64+
toolchain: ${{ matrix.rust-version }}
65+
66+
- id: cache-rust
67+
uses: Swatinem/rust-cache@v2
68+
69+
- name: Run cargo tests
70+
run: cargo test --all-features

.gitignore

Lines changed: 54 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -1,21 +1,59 @@
1-
# Generated by Cargo
2-
# will have compiled files and executables
3-
debug/
4-
target/
1+
# License
2+
# Licensed to the Apache Software Foundation (ASF) under one
3+
# or more contributor license agreements. See the NOTICE file
4+
# distributed with this work for additional information
5+
# regarding copyright ownership. The ASF licenses this file
6+
# to you under the Apache License, Version 2.0 (the
7+
# "License"); you may not use this file except in compliance
8+
# with the License. You may obtain a copy of the License at
9+
#
10+
# http://www.apache.org/licenses/LICENSE-2.0
11+
#
12+
# Unless required by applicable law or agreed to in writing,
13+
# software distributed under the License is distributed on an
14+
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15+
# KIND, either express or implied. See the License for the
16+
# specific language governing permissions and limitations
17+
# under the License.
518

6-
# Remove Cargo.lock from gitignore if creating an executable, leave it for libraries
7-
# More information here https://doc.rust-lang.org/cargo/guide/cargo-toml-vs-cargo-lock.html
19+
# Linux perf sample data
20+
perf.data
21+
perf.data.old
22+
23+
# IDE and editor directories
24+
.vscode
25+
.idea/
26+
.pytest_cache/
27+
28+
# Python virtual environments
29+
venv/*
30+
31+
# macOS
32+
.DS_Store
33+
34+
# Docker volumes used for caching
35+
.docker
36+
37+
# Rust build output
38+
target
839
Cargo.lock
940

10-
# These are backup files generated by rustfmt
11-
**/*.rs.bk
41+
# Backup files
42+
*.orig
43+
.gdb_history
44+
.history
45+
46+
# Swap files
47+
.*.swp
48+
.*.swo
49+
50+
# Rust-specific tools
51+
rusty-tags.vi
1252

13-
# MSVC Windows builds of rustc generate these, which store debugging information
14-
*.pdb
53+
# Apache release artifacts
54+
dev/dist
1555

16-
# RustRover
17-
# JetBrains specific template is maintained in a separate JetBrains.gitignore that can
18-
# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
19-
# and can be added to the global gitignore or merged into this file. For a more nuclear
20-
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
21-
#.idea/
56+
# Apache RAT report files
57+
filtered_rat.txt
58+
rat.txt
59+
.githubchangeloggenerator.cache*

.pre-commit-config.yaml

Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,49 @@
1+
# Licensed to the Apache Software Foundation (ASF) under one
2+
# or more contributor license agreements. See the NOTICE file
3+
# distributed with this work for additional information
4+
# regarding copyright ownership. The ASF licenses this file
5+
# to you under the Apache License, Version 2.0 (the
6+
# "License"); you may not use this file except in compliance
7+
# with the License. You may obtain a copy of the License at
8+
#
9+
# http://www.apache.org/licenses/LICENSE-2.0
10+
#
11+
# Unless required by applicable law or agreed to in writing,
12+
# software distributed under the License is distributed on an
13+
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14+
# KIND, either express or implied. See the License for the
15+
# specific language governing permissions and limitations
16+
# under the License.
17+
18+
fail_fast: true
19+
20+
repos:
21+
- repo: https://github.com/pre-commit/pre-commit-hooks
22+
rev: v4.0.1
23+
hooks:
24+
- id: check-yaml
25+
- id: check-toml
26+
- id: end-of-file-fixer
27+
- id: trailing-whitespace
28+
- id: check-added-large-files
29+
30+
- repo: local
31+
hooks:
32+
- id: format
33+
name: Format
34+
entry: cargo fmt
35+
types: [rust]
36+
language: system
37+
pass_filenames: false
38+
- id: clippy
39+
name: Clippy
40+
entry: cargo clippy -- -D warnings
41+
types: [rust]
42+
language: system
43+
pass_filenames: false
44+
- id: test
45+
name: Test
46+
entry: cargo test
47+
types: [rust]
48+
language: system
49+
pass_filenames: false

.rustfmt.toml

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
# Licensed to the Apache Software Foundation (ASF) under one
2+
# or more contributor license agreements. See the NOTICE file
3+
# distributed with this work for additional information
4+
# regarding copyright ownership. The ASF licenses this file
5+
# to you under the Apache License, Version 2.0 (the
6+
# "License"); you may not use this file except in compliance
7+
# with the License. You may obtain a copy of the License at
8+
#
9+
# http://www.apache.org/licenses/LICENSE-2.0
10+
#
11+
# Unless required by applicable law or agreed to in writing,
12+
# software distributed under the License is distributed on an
13+
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14+
# KIND, either express or implied. See the License for the
15+
# specific language governing permissions and limitations
16+
# under the License.
17+
18+
max_width = 120

Cargo.toml

Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,54 @@
1+
# Licensed to the Apache Software Foundation (ASF) under one
2+
# or more contributor license agreements. See the NOTICE file
3+
# distributed with this work for additional information
4+
# regarding copyright ownership. The ASF licenses this file
5+
# to you under the Apache License, Version 2.0 (the
6+
# "License"); you may not use this file except in compliance
7+
# with the License. You may obtain a copy of the License at
8+
#
9+
# http://www.apache.org/licenses/LICENSE-2.0
10+
#
11+
# Unless required by applicable law or agreed to in writing,
12+
# software distributed under the License is distributed on an
13+
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14+
# KIND, either express or implied. See the License for the
15+
# specific language governing permissions and limitations
16+
# under the License.
17+
18+
[package]
19+
name = "datafusion-functions-extra"
20+
version = "0.1.0"
21+
edition = "2021"
22+
description = "Extra Functions for DataFusion"
23+
readme = "README.md"
24+
license = "Apache-2.0"
25+
keywords = ["datafusion", "functions-extra", "aggregations"]
26+
repository = "https://github.com/datafusion-contrib/datafusion-functions-extra/"
27+
rust-version = "1.76"
28+
29+
[lib]
30+
name = "datafusion_functions_extra"
31+
path = "src/lib.rs"
32+
33+
[dependencies]
34+
ahash = { version = "0.8", default-features = false, features = [
35+
"runtime-rng",
36+
] }
37+
datafusion = "42"
38+
hashbrown = { version = "0.14.5", features = ["raw"] }
39+
log = "^0.4"
40+
paste = "1"
41+
42+
[dev-dependencies]
43+
arrow = { version = "53.0.0", features = ["test_utils"] }
44+
criterion = { version = "0.5", features = ["async_tokio"] }
45+
insta = { version = "1.40.0", features = ["yaml"] }
46+
tokio = { version = "1.36", features = ["full"] }
47+
48+
[lints.clippy]
49+
dbg_macro = "deny"
50+
print_stdout = "deny"
51+
52+
[[bench]]
53+
name = "mode"
54+
harness = false

README.md

Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,64 @@
1+
# datafusion-functions-extra
2+
3+
[![CI](https://github.com/datafusion-contrib/datafusion-functions-extra/actions/workflows/ci.yml/badge.svg?event=push)](https://github.com/datafusion-contrib/datafusion-functions-extra/actions/workflows/ci.yml?query=branch%3Amain)
4+
<!-- [![Crates.io](https://img.shields.io/crates/v/datafusion-functions-extra?color=green)](https://crates.io/crates/datafusion-functions-extra) -->
5+
6+
**Note:** This is not an official Apache Software Foundation release.
7+
8+
This crate provides extra functions for DataFusion, specifically focusing on advanced aggregations. These extensions are inspired by other projects like DuckDB and Spark SQL.
9+
10+
To use these functions, you'll just need to call:
11+
12+
```rust
13+
datafusion_functions_extra::register_all_extra_functions(&mut ctx)?;
14+
```
15+
16+
# Examples
17+
18+
```sql
19+
-- Create a table with various columns containing strings, integers, floats, dates, and times
20+
CREATE TABLE test_table (
21+
utf8_col VARCHAR,
22+
int64_col INT,
23+
float64_col FLOAT,
24+
date64_col DATE,
25+
time64_col TIME
26+
) AS VALUES
27+
('apple', 1, 1.0, '2021-01-01', '01:00:00'),
28+
('banana', 2, 2.0, '2021-01-02', '02:00:00'),
29+
('apple', 2, 2.0, '2021-01-02', '02:00:00'),
30+
('orange', 3, 3.0, '2021-01-03', '03:00:00'),
31+
('banana', 3, 3.0, '2021-01-03', '03:00:00'),
32+
('apple', 3, 3.0, '2021-01-03', '03:00:00');
33+
34+
-- Get the mode of the utf8_col column
35+
SELECT mode(utf8_col) as mode_utf8 FROM test_table;
36+
-- Results in
37+
-- +----------+
38+
-- | mode_utf8|
39+
-- +----------+
40+
-- | apple |
41+
-- +----------+
42+
43+
-- Get the mode of the date64_col column
44+
SELECT mode(date64_col) as mode_date FROM test_table;
45+
-- Results in
46+
-- +-----------+
47+
-- | mode_date |
48+
-- +-----------+
49+
-- | 2021-01-03|
50+
-- +-----------+
51+
52+
-- Get the mode of the time64_col column
53+
SELECT mode(time64_col) as mode_time FROM test_table;
54+
-- Results in
55+
-- +-----------+
56+
-- | mode_time |
57+
-- +-----------+
58+
-- | 03:00:00 |
59+
-- +-----------+
60+
```
61+
62+
## Done
63+
64+
* [x] `mode(expression) -> scalar` - Returns the most frequent (mode) value from a column of data.

0 commit comments

Comments
 (0)