Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
42 commits
Select commit Hold shift + click to select a range
a1ab4f2
add alembic migration
ioan-alexandra Feb 10, 2026
51708f7
add alembic to pyproject, delete db and adjust migration
ioan-alexandra Feb 10, 2026
bb7523e
Merge branch 'iterorganization:develop' into develop
ioan-alexandra Feb 11, 2026
7f65c43
format
ioan-alexandra Feb 11, 2026
5bad088
lint
ioan-alexandra Feb 11, 2026
7846265
redo autogenerate alembic to add files
ioan-alexandra Feb 13, 2026
3712954
lint
ioan-alexandra Feb 13, 2026
dbc96bc
let Base.metadata handle all models
ioan-alexandra Feb 13, 2026
0d6008d
Merge branch 'develop' of github.com:ioan-alexandra/SimDB into develop
ioan-alexandra Feb 26, 2026
1d5b15b
switch from metadata field to json column
ioan-alexandra Feb 27, 2026
40f93a3
take out limit Header since its fixed in another pr
ioan-alexandra Feb 27, 2026
6b3f56c
format
ioan-alexandra Feb 27, 2026
35a4cf7
lint
ioan-alexandra Feb 27, 2026
886895c
use sql statements to only update specific field instead of whole json
ioan-alexandra Feb 27, 2026
0f24002
use MutableDict
ioan-alexandra Feb 27, 2026
e28eb77
typing errors
ioan-alexandra Feb 27, 2026
9ffc009
fix tests
ioan-alexandra Feb 27, 2026
8e75576
Merge branch 'iterorganization:develop' into develop
ioan-alexandra Mar 5, 2026
a47eb5c
Merge branch 'iterorganization:develop' into develop
ioan-alexandra Mar 13, 2026
df068cf
check if metadata exists before creating
ioan-alexandra Mar 13, 2026
4cf5461
remove custom serialization and use sqlalchemy
ioan-alexandra Mar 13, 2026
b5249db
formatting
ioan-alexandra Mar 13, 2026
0b62d57
small fixes
ioan-alexandra Mar 13, 2026
15858f1
yannick comments
ioan-alexandra Mar 13, 2026
7850422
format
ioan-alexandra Mar 13, 2026
a83b1c0
linting
ioan-alexandra Mar 13, 2026
3c1eacd
add json serializable
ioan-alexandra Mar 19, 2026
9086488
ruff
ioan-alexandra Mar 19, 2026
4d0ba65
Merge remote-tracking branch 'upstream/develop' into develop
ioan-alexandra Mar 30, 2026
851ec1c
ruff
ioan-alexandra Mar 30, 2026
ee2c33d
typing
ioan-alexandra Mar 30, 2026
803678a
fix ty errors
ioan-alexandra Mar 31, 2026
79bb699
fix tests
ioan-alexandra Mar 31, 2026
fd7ea7f
move from arrays to ranges
ioan-alexandra Mar 31, 2026
391742e
Query ranges using SQL directly
Yannicked Apr 2, 2026
2e9c56c
Add tests for querying
Yannicked Apr 2, 2026
96d254d
Cleanup filtering functions
Yannicked Apr 2, 2026
16d778e
Revert removal of base64 encoded np array decoding
Yannicked Apr 2, 2026
7319aa3
Fix numpy array ingestion
Yannicked Apr 2, 2026
a241d17
Reduce amount of queries
Yannicked Apr 2, 2026
6ca91d8
Ruff
Yannicked Apr 2, 2026
9f774f2
ty
Yannicked Apr 2, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
171 changes: 171 additions & 0 deletions alembic/versions/28bee3aa2429_convert_metadata_to_json_column.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,171 @@
"""convert_metadata_to_json_column

Revision ID: 28bee3aa2429
Revises: 9e9a4a7cd639
Create Date: 2026-02-26 17:01:30.925750

"""

import json
import pickle
from typing import Any, Sequence, Union

import numpy as np
import sqlalchemy as sa
from sqlalchemy import text
from sqlalchemy.dialects import postgresql

from alembic import op

revision: str = "28bee3aa2429"
down_revision: Union[str, Sequence[str], None] = "9e9a4a7cd639"
branch_labels: Union[str, Sequence[str], None] = None
depends_on: Union[str, Sequence[str], None] = None


def _make_json_serializable(value: Any) -> Any:
"""Recursively convert a value to something JSON-serializable.

Numpy arrays are converted to Range dicts using their min and max values.
"""
if value is None or isinstance(value, (str, bool)):
return value
if isinstance(value, (int, float)) and np.isfinite(value):
return value
if isinstance(value, (list, tuple)):
return _make_json_serializable(np.array(value))
if isinstance(value, dict):
return {str(k): _make_json_serializable(v) for k, v in value.items()}
# Convert numpy arrays to Range format
try:
if isinstance(value, np.ndarray) and value.size > 0:
return {
"min": _make_json_serializable(value.min()),
"max": _make_json_serializable(value.max()),
}
except ImportError:
pass
return str(value)


def upgrade() -> None:
"""Upgrade schema."""
conn = op.get_bind()
inspector = sa.inspect(conn)

# Add metadata column only if it doesn't already exist (e.g. created via create_all)
existing_columns = [col["name"] for col in inspector.get_columns("simulations")]
if "metadata" not in existing_columns:
if conn.dialect.name == "postgresql":
op.add_column(
"simulations",
sa.Column(
"metadata", postgresql.JSONB(astext_type=sa.Text()), nullable=True
),
)
else:
op.add_column(
"simulations", sa.Column("metadata", sa.JSON(), nullable=True)
)

# Migrate existing data from metadata table if it still exists
if "metadata" in inspector.get_table_names():
result = conn.execute(text("SELECT DISTINCT sim_id FROM metadata"))
sim_ids = [row[0] for row in result]

for sim_id in sim_ids:
meta_rows = conn.execute(
text("SELECT element, value FROM metadata WHERE sim_id = :sim_id"),
{"sim_id": sim_id},
)

meta_dict = {}
for element, value in meta_rows:
if value is not None:
try:
unpickled = (
pickle.loads(value)
if isinstance(value, (bytes, bytearray, memoryview))
else value
)
except Exception:
unpickled = repr(value)
meta_dict[element] = _make_json_serializable(unpickled)
else:
meta_dict[element] = None

if conn.dialect.name == "postgresql":
conn.execute(
text(
"UPDATE simulations SET metadata = :metadata WHERE id = :sim_id"
),
{"metadata": json.dumps(meta_dict), "sim_id": sim_id},
)
else:
conn.execute(
text(
"UPDATE simulations SET metadata = :metadata WHERE id = :sim_id"
),
{"metadata": json.dumps(meta_dict), "sim_id": sim_id},
)

op.drop_index("metadata_index", table_name="metadata")
op.drop_index(op.f("ix_metadata_sim_id"), table_name="metadata")
op.drop_table("metadata")


def downgrade() -> None:
"""Downgrade schema."""
conn = op.get_bind()

# Recreate metadata table
op.create_table(
"metadata",
sa.Column("id", sa.Integer(), nullable=False),
sa.Column("sim_id", sa.Integer(), nullable=True),
sa.Column("element", sa.String(length=250), nullable=False),
sa.Column("value", sa.PickleType(), nullable=True),
sa.ForeignKeyConstraint(
["sim_id"],
["simulations.id"],
),
sa.PrimaryKeyConstraint("id"),
)
op.create_index(op.f("ix_metadata_sim_id"), "metadata", ["sim_id"], unique=False)
op.create_index("metadata_index", "metadata", ["sim_id", "element"], unique=True)

# Migrate data back from JSON column to metadata table
if conn.dialect.name == "postgresql":
migration_query = text("""
INSERT INTO metadata (sim_id, element, value)
SELECT s.id, kv.key, kv.value::text
FROM simulations s, json_each_text(s.metadata::json) kv
WHERE s.metadata IS NOT NULL
""")
conn.execute(migration_query)
else:
result = conn.execute(
text("SELECT id, metadata FROM simulations WHERE metadata IS NOT NULL")
)
for sim_id, metadata_json in result:
if metadata_json:
try:
meta_dict = json.loads(metadata_json)
for element, value in meta_dict.items():
# Pickle the value for storage
pickled_value = pickle.dumps(value, 0)
conn.execute(
text(
"INSERT INTO metadata (sim_id, element, value) "
"VALUES (:sim_id, :element, :value)"
),
{
"sim_id": sim_id,
"element": element,
"value": pickled_value,
},
)
except Exception:
pass

op.drop_column("simulations", "metadata")
7 changes: 4 additions & 3 deletions src/simdb/cli/commands/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@
from typing import TYPE_CHECKING, Any, Dict, List, Optional, Tuple, TypeVar

import click
import numpy

if TYPE_CHECKING:
# Only importing these for type checking and documentation generation in order to
Expand All @@ -28,9 +27,11 @@ def _flatten_dict(values: Dict) -> List[Tuple[str, str]]:

def _format_meta_value(meta_value: Any, max_len: int) -> str:
"""
Format the meta value as a string, limiting array values to max_len.
Format the meta value as a string, limiting list values to max_len.
"""
if isinstance(meta_value, (list, numpy.ndarray)):
if isinstance(meta_value, dict) and "min" in meta_value and "max" in meta_value:
return f"[{meta_value['min']}, {meta_value['max']}]"
if isinstance(meta_value, list):
values = []
for i, v in enumerate(meta_value):
values.append(f"{v:.2f}")
Expand Down
10 changes: 4 additions & 6 deletions src/simdb/cli/remote_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -797,11 +797,9 @@ def push_simulation(
):
continue
sim_file = next(
f
for f in sim_data["inputs"]
if f.get("uuid") == file.uuid # type: ignore[union-attr]
f for f in sim_data["inputs"] if f.get("uuid") == file.uuid
)
sim_file["uri"] = f"file:{path}" # type: ignore[invalid-assignment]
sim_file["uri"] = f"file:{path}"
self._push_file(
path,
file.uuid,
Expand Down Expand Up @@ -859,12 +857,12 @@ def push_simulation(
(
f
for f in sim_data["outputs"]
if f.get("uuid") == file.uuid # type: ignore[union-attr]
if f.get("uuid") == file.uuid
),
None,
)
if sim_file:
sim_file["uri"] = f"file:{path}" # type: ignore[invalid-assignment]
sim_file["uri"] = f"file:{path}"
self._push_file(
path,
file.uuid,
Expand Down
Loading
Loading