Skip to content

Commit 317c955

Browse files
dtulgarlamyilonginskshetryjonburdo
authored
Initial DataChain Commit (#1)
This adds the DataChain code to this repository, and includes an additional top ten contributors from the source iterative/dvcx repository. --------- Co-authored-by: Ronan Lamy <[email protected]> Co-authored-by: Ivan Longin <[email protected]> Co-authored-by: skshetry <[email protected]> Co-authored-by: Jon Burdo <[email protected]> Co-authored-by: Dmitry Petrov <[email protected]> Co-authored-by: Domas Monkus <[email protected]> Co-authored-by: Vladimir Rudnykh <[email protected]> Co-authored-by: Dave Berenbaum <[email protected]> Co-authored-by: Matt Seddon <[email protected]> Co-authored-by: Ivan Shcheklein <[email protected]>
1 parent 676d83e commit 317c955

File tree

252 files changed

+46750
-0
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

252 files changed

+46750
-0
lines changed

.cruft.json

+23
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
{
2+
"template": "https://github.com/iterative/py-template",
3+
"commit": "867297aa15a0deaf5302edd01a2bc7ab87039627",
4+
"checkout": null,
5+
"context": {
6+
"cookiecutter": {
7+
"project_name": "datachain",
8+
"package_name": "datachain",
9+
"friendly_name": "DataChain",
10+
"author": "Dmitry Petrov",
11+
"email": "[email protected]",
12+
"github_user": "iterative",
13+
"version": "0.0.0",
14+
"copyright_year": "2022",
15+
"license": "Apache-2.0",
16+
"docs": true,
17+
"short_description": "Wrangle unstructured AI data at scale",
18+
"development_status": "Development Status :: 2 - Pre-Alpha",
19+
"_template": "https://github.com/iterative/py-template"
20+
}
21+
},
22+
"directory": null
23+
}

.gitattributes

+1
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
* text=auto eol=lf

.github/ISSUE_TEMPLATE/bug_report.yml

+27
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
name: 🐛 Bug Report
2+
description: Report a bug to help us improve
3+
labels: bug
4+
5+
body:
6+
- type: textarea
7+
id: description
8+
attributes:
9+
label: Description
10+
description:
11+
validations:
12+
required: true
13+
14+
- type: textarea
15+
id: version
16+
attributes:
17+
label: Version Info
18+
description: |
19+
Please run the following command and copy the output below:
20+
21+
```bash
22+
datachain -V; python -V
23+
```
24+
25+
render: Text
26+
validations:
27+
required: false

.github/ISSUE_TEMPLATE/empty_issue.md

+4
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
---
2+
name: Empty Issue
3+
about: A minimal template for ordinary issues or sub-tasks
4+
---
+12
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
name: 💡 Feature Request
2+
description: Suggest a new feature or share ideas
3+
labels: enhancement
4+
5+
body:
6+
- type: textarea
7+
id: description
8+
attributes:
9+
label: Description
10+
description:
11+
validations:
12+
required: true

.github/codecov.yaml

+16
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
coverage:
2+
status:
3+
project:
4+
default:
5+
# auto compares coverage to the previous base commit
6+
target: auto
7+
# adjust accordingly based on how flaky your tests are
8+
# this allows a 10% drop from the previous base commit coverage
9+
threshold: 10%
10+
# non-blocking status checks
11+
informational: true
12+
13+
flags:
14+
datachain:
15+
paths:
16+
- src/datachain

.github/dependabot.yml

+16
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
version: 2
2+
3+
updates:
4+
- directory: "/"
5+
package-ecosystem: "pip"
6+
schedule:
7+
interval: "weekly"
8+
labels:
9+
- "maintenance"
10+
11+
- directory: "/"
12+
package-ecosystem: "github-actions"
13+
schedule:
14+
interval: "weekly"
15+
labels:
16+
- "maintenance"

.github/workflows/benchmarks.yml

+34
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
name: Benchmarks
2+
3+
on:
4+
schedule:
5+
- cron: '0 0 * * *'
6+
pull_request:
7+
types: [opened, reopened, labeled, synchronize]
8+
workflow_dispatch: {}
9+
10+
env:
11+
FORCE_COLOR: "1"
12+
13+
jobs:
14+
build:
15+
if: ${{ github.event_name != 'pull_request' || contains(github.event.pull_request.labels.*.name, 'run-benchmarks') }}
16+
runs-on: ubuntu-latest
17+
18+
steps:
19+
- uses: actions/checkout@v4
20+
21+
- name: Set up Python 3.10
22+
uses: actions/setup-python@v5
23+
with:
24+
python-version: '3.10'
25+
cache: 'pip'
26+
27+
- name: Upgrade nox and uv
28+
run: |
29+
python -m pip install --upgrade 'nox[uv]'
30+
nox --version
31+
uv --version
32+
33+
- name: Run benchmarks
34+
run: nox -s bench

.github/workflows/release.yml

+40
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
name: Release
2+
3+
on:
4+
release:
5+
types: [published]
6+
workflow_dispatch:
7+
8+
env:
9+
FORCE_COLOR: "1"
10+
11+
jobs:
12+
release:
13+
environment: pypi
14+
permissions:
15+
contents: read
16+
id-token: write
17+
runs-on: ubuntu-latest
18+
steps:
19+
- name: Check out the repository
20+
uses: actions/checkout@v4
21+
with:
22+
fetch-depth: 0
23+
24+
- name: Set up Python 3.10
25+
uses: actions/setup-python@v5
26+
with:
27+
python-version: '3.10'
28+
29+
- name: Upgrade nox and uv
30+
run: |
31+
python -m pip install --upgrade 'nox[uv]'
32+
nox --version
33+
uv --version
34+
35+
- name: Build package
36+
run: nox -s build
37+
38+
- name: Upload package
39+
if: github.event_name == 'release'
40+
uses: pypa/gh-action-pypi-publish@release/v1

.github/workflows/tests.yml

+127
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,127 @@
1+
name: Tests
2+
3+
on:
4+
push:
5+
branches: [main]
6+
pull_request:
7+
workflow_dispatch:
8+
9+
env:
10+
FORCE_COLOR: "1"
11+
12+
concurrency:
13+
group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
14+
cancel-in-progress: true
15+
16+
jobs:
17+
lint:
18+
runs-on: ubuntu-latest
19+
steps:
20+
21+
- name: Check out the repository
22+
uses: actions/checkout@v4
23+
with:
24+
fetch-depth: 0
25+
26+
- name: Set up Python 3.9
27+
uses: actions/setup-python@v5
28+
with:
29+
python-version: '3.9'
30+
cache: 'pip'
31+
32+
- name: Upgrade nox and uv
33+
run: |
34+
python -m pip install --upgrade 'nox[uv]'
35+
nox --version
36+
uv --version
37+
38+
- name: Cache mypy
39+
uses: actions/cache@v4
40+
with:
41+
path: .mypy_cache
42+
key: mypy-${{ runner.os }}-${{ env.pythonLocation }}-${{ hashFiles('pyproject.toml') }}
43+
44+
- name: Cache pre-commit hooks
45+
uses: actions/cache@v4
46+
with:
47+
path: ~/.cache/pre-commit
48+
key: pre-commit-3|${{ env.pythonLocation }}|${{ hashFiles('.pre-commit-config.yaml') }}
49+
50+
- name: Lint code
51+
run: nox -s lint
52+
53+
tests:
54+
timeout-minutes: 25
55+
runs-on: ${{ matrix.os }}
56+
strategy:
57+
fail-fast: false
58+
matrix:
59+
os: [ubuntu-latest-8-cores]
60+
pyv: ['3.9', '3.10', '3.11', '3.12']
61+
include:
62+
- os: macos-latest
63+
pyv: '3.9'
64+
- os: macos-latest
65+
pyv: '3.12'
66+
- os: windows-latest-8-cores
67+
pyv: '3.9'
68+
- os: windows-latest-8-cores
69+
pyv: '3.12'
70+
71+
steps:
72+
73+
# https://github.com/iterative/pytest-servers/pull/122
74+
# https://github.com/abiosoft/colima/issues/468
75+
# https://github.com/abiosoft/colima/blob/main/docs/FAQ.md#cannot-connect-to-the-docker-daemon-at-unixvarrundockersock-is-the-docker-daemon-running
76+
# colima v0.5.6 seems to run more stable than the latest - that has occasional network failures (ports are not open)
77+
# see: https://github.com/abiosoft/colima/issues/962
78+
- name: Use colima as default docker host on MacOS
79+
if: runner.os == 'macOS'
80+
run: |
81+
brew install docker lima || true # avoid non-zero exit code if brew link fails
82+
sudo curl -L -o /usr/local/bin/colima https://github.com/abiosoft/colima/releases/download/v0.5.6/colima-Darwin-x86_64
83+
sudo chmod +x /usr/local/bin/colima
84+
colima start
85+
sudo ln -vsf "${HOME}"/.colima/default/docker.sock /var/run/docker.sock
86+
env:
87+
HOMEBREW_NO_AUTO_UPDATE: true
88+
HOMEBREW_NO_INSTALL_CLEANUP: true
89+
HOMEBREW_NO_INSTALLED_DEPENDENTS_CHECK: true
90+
HOMEBREW_NO_INSTALL_UPGRADE: true
91+
92+
- name: Check out the repository
93+
uses: actions/checkout@v4
94+
with:
95+
fetch-depth: 0
96+
97+
- name: Set up Python ${{ matrix.pyv }}
98+
uses: actions/setup-python@v5
99+
with:
100+
python-version: ${{ matrix.pyv }}
101+
cache: 'pip'
102+
103+
- name: Upgrade nox and uv
104+
run: |
105+
python -m pip install --upgrade 'nox[uv]'
106+
nox --version
107+
uv --version
108+
109+
- name: Skip flaky azure, gs remotes if unavailable on macos
110+
if: runner.os == 'macOS'
111+
run: echo 'DATACHAIN_TEST_SKIP_MISSING_REMOTES=azure,gs' >> "$GITHUB_ENV"
112+
113+
- name: Run tests
114+
run: nox -s tests-${{ matrix.pyv }}
115+
116+
- name: Upload coverage report
117+
uses: codecov/codecov-action@v4
118+
with:
119+
token: ${{ secrets.CODECOV_TOKEN }}
120+
files: coverage.xml
121+
flags: datachain
122+
123+
- name: Build package
124+
run: nox -s build
125+
126+
- name: Build docs
127+
run: nox -s docs
+19
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
name: Update template
2+
3+
on:
4+
schedule:
5+
- cron: '5 1 * * *' # every day at 01:05
6+
7+
workflow_dispatch:
8+
9+
jobs:
10+
update:
11+
runs-on: ubuntu-latest
12+
steps:
13+
- name: Check out the repository
14+
uses: actions/checkout@v4
15+
16+
- name: Update template
17+
uses: iterative/py-template@main
18+
with:
19+
token: ${{ secrets.UPDATE_TEMPLATE_TOKEN || secrets.GITHUB_TOKEN }}

0 commit comments

Comments
 (0)