Skip to content

Commit dce73b3

Browse files
authored
Add status tracking (#72)
1 parent 284185d commit dce73b3

35 files changed

+22
-12
lines changed

.github/workflows/ci-checks.yml

+3-4
Original file line numberDiff line numberDiff line change
@@ -12,22 +12,21 @@ concurrency:
1212

1313
jobs:
1414
check-precommit:
15-
uses: Lightning-AI/utilities/.github/workflows/check-precommit.yml@v0.10.1
15+
uses: Lightning-AI/utilities/.github/workflows/check-precommit.yml@main
1616

1717
check-typing:
1818
# TODO: switch to main after fix lends
1919
uses: Lightning-AI/utilities/.github/workflows/check-typing.yml@main
2020
with:
2121
actions-ref: main
22-
source-dir: ""
2322

2423
check-schema:
25-
uses: Lightning-AI/utilities/.github/workflows/check-schema.yml@v0.10.1
24+
uses: Lightning-AI/utilities/.github/workflows/check-schema.yml@main
2625
with:
2726
azure-dir: ""
2827

2928
check-package:
30-
uses: Lightning-AI/utilities/.github/workflows/check-package.yml@v0.10.1
29+
uses: Lightning-AI/utilities/.github/workflows/check-package.yml@main
3130
with:
3231
actions-ref: v0.10.1
3332
import-name: "litdata"

docs/source/conf.py

+5-2
Original file line numberDiff line numberDiff line change
@@ -16,19 +16,22 @@
1616

1717
_PATH_HERE = os.path.abspath(os.path.dirname(__file__))
1818
_PATH_ROOT = os.path.realpath(os.path.join(_PATH_HERE, "..", ".."))
19+
_PATH_SOURCE = os.path.join(_PATH_ROOT, "src")
1920
sys.path.insert(0, os.path.abspath(_PATH_ROOT))
2021

2122
SPHINX_MOCK_REQUIREMENTS = int(os.environ.get("SPHINX_MOCK_REQUIREMENTS", True))
2223

2324
# alternative https://stackoverflow.com/a/67692/4521646
24-
spec = spec_from_file_location("litdata/__about__.py", os.path.join(_PATH_ROOT, "litdata", "__about__.py"))
25+
spec = spec_from_file_location(
26+
"litdata/__about__.py", os.path.join(_PATH_SOURCE, "litdata", "__about__.py")
27+
)
2528
about = module_from_spec(spec)
2629
spec.loader.exec_module(about)
2730

2831
# -- Project information -----------------------------------------------------
2932

3033
# this name shall match the project name in Github as it is used for linking to code
31-
project = "lit-data"
34+
project = "litdata"
3235
copyright = about.__copyright__
3336
author = about.__author__
3437

pyproject.toml

+1-1
Original file line numberDiff line numberDiff line change
@@ -88,7 +88,7 @@ lint.ignore-init-module-imports = true
8888
"S501", # Probable use of `requests` call with `verify=False` disabling SSL certificate checks
8989
"S108", # Probable insecure usage of temporary file or directory: "/tmp/data/MNIST"
9090
]
91-
"litdata/**" = [
91+
"src/**" = [
9292
"S101", # todo: Use of `assert` detected
9393
"S105", "S106", "S107", # todo: Possible hardcoded password: ...
9494
"S113", # todo: Probable use of requests call without timeout

setup.py

+6-3
Original file line numberDiff line numberDiff line change
@@ -5,14 +5,15 @@
55
from pathlib import Path
66

77
from pkg_resources import parse_requirements
8-
from setuptools import setup
8+
from setuptools import find_packages, setup
99

1010
_PATH_ROOT = os.path.dirname(__file__)
11-
_PATH_REQUIRES = os.path.join(_PATH_ROOT, "requirements")
11+
_PATH_SOURCE = os.path.join(_PATH_ROOT, "src")
12+
_PATH_REQUIRES = os.path.join(_PATH_ROOT, "_requirements")
1213

1314

1415
def _load_py_module(fname, pkg="litdata"):
15-
spec = spec_from_file_location(os.path.join(pkg, fname), os.path.join(_PATH_ROOT, pkg, fname))
16+
spec = spec_from_file_location(os.path.join(pkg, fname), os.path.join(_PATH_SOURCE, pkg, fname))
1617
py = module_from_spec(spec)
1718
spec.loader.exec_module(py)
1819
return py
@@ -60,6 +61,8 @@ def _prepare_extras(requirements_dir: str = _PATH_REQUIRES, skip_files: tuple =
6061
download_url="https://github.com/Lightning-AI/litdata",
6162
license=about.__license__,
6263
long_description=readme,
64+
packages=find_packages(where="src"),
65+
package_dir={"": "src"},
6366
long_description_content_type="text/markdown",
6467
include_package_data=True,
6568
zip_safe=False,
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.

litdata/processing/data_processor.py renamed to src/litdata/processing/data_processor.py

+7-2
Original file line numberDiff line numberDiff line change
@@ -953,6 +953,9 @@ def run(self, data_recipe: DataRecipe) -> None:
953953
leave=True,
954954
dynamic_ncols=True,
955955
)
956+
num_nodes = _get_num_nodes()
957+
node_rank = _get_node_rank()
958+
total_num_items = len(user_items)
956959

957960
while True:
958961
try:
@@ -973,6 +976,10 @@ def run(self, data_recipe: DataRecipe) -> None:
973976
if current_total == num_items:
974977
break
975978

979+
if _IS_IN_STUDIO and node_rank == 0:
980+
with open("status.json", "w") as f:
981+
json.dump({"progress": str(100 * current_total * num_nodes / total_num_items) + "%"}, f)
982+
976983
# Exit early if all the workers are done.
977984
# This means there were some kinda of errors.
978985
if all(not w.is_alive() for w in self.workers):
@@ -981,8 +988,6 @@ def run(self, data_recipe: DataRecipe) -> None:
981988

982989
pbar.close()
983990

984-
num_nodes = _get_num_nodes()
985-
node_rank = _get_node_rank()
986991
# TODO: Understand why it hangs.
987992
if num_nodes == 1:
988993
for w in self.workers:
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.

0 commit comments

Comments
 (0)