Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Spatial aligned svid lookup #120

Open
wants to merge 44 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
44 commits
Select commit Hold shift + click to select a range
72c2a75
feat: add bounding box aligned lookup
dlbrittain Aug 7, 2023
d39588f
fix: close connection
dlbrittain Aug 7, 2023
02f7e9c
feat: add api endpoint to trigger workflow
dlbrittain Aug 7, 2023
ddc5705
feat: add cv cache
dlbrittain Aug 7, 2023
54a414f
feat: register workflow in celery worker
dlbrittain Aug 7, 2023
f8f09c7
chore: formatting
dlbrittain Aug 7, 2023
1ba39b8
fix: can use create seg table as task
dlbrittain Aug 7, 2023
7b584c4
fix: don't convert uint64 to floats!
dlbrittain Aug 9, 2023
7943f59
fix: args does not have get
dlbrittain Aug 9, 2023
e255a90
fix: also filter if root ids = 0
dlbrittain Aug 10, 2023
12db925
chore: bump requirements
dlbrittain Nov 20, 2023
3b63a43
feat: use python3.10
dlbrittain Nov 20, 2023
016bcc6
test: test against more python versions
dlbrittain Nov 20, 2023
3bd9aa9
feat: improve supervoxel lookup
dlbrittain Nov 20, 2023
000a340
fix: wrap python versions in quotes (fixes 3.10)
dlbrittain Nov 20, 2023
175f80d
fix(temp): try only python3.10
dlbrittain Nov 20, 2023
9eeaf16
adding cache reset to sv lookup
fcollman Apr 3, 2024
99a872d
bugfix: new create_segmentation_model and annotation_model
fcollman Apr 3, 2024
2639770
fix: resolution normalization
dlbrittain Apr 4, 2024
297d46e
fix: optionally turn off databaes upload for benchmarking
dlbrittain Apr 16, 2024
51938fc
fix: add upload to database endpoint
dlbrittain Apr 17, 2024
284ac80
fix: use arg parser
dlbrittain Apr 17, 2024
21ce809
fix: set timing to info level
dlbrittain Apr 17, 2024
a1bcfd1
adding more timing information
fcollman Apr 18, 2024
cf4c998
try download point
fcollman Apr 19, 2024
5e75557
adding caching
fcollman Apr 19, 2024
c626b3b
adding corner debug statement
fcollman Apr 19, 2024
c0d8183
improved debugging
fcollman Apr 20, 2024
27b121c
fix float call
fcollman Apr 20, 2024
fdfc535
avoiding cloudvolume error
fcollman Apr 20, 2024
a74db5c
fix watershed
fcollman Apr 20, 2024
b2b0cee
dropping saving chunk pos
fcollman Apr 20, 2024
5180e26
fix mip
fcollman Apr 20, 2024
a968c62
fixing meta call
fcollman Apr 20, 2024
5a71aff
convert to string
fcollman Apr 20, 2024
00e5346
trying more lru_bytes
fcollman Apr 20, 2024
443f9da
fixing log statement
fcollman Apr 20, 2024
d6a8101
upping to a gigabyte
fcollman Apr 20, 2024
30dfddf
adding lru configuration
fcollman Apr 21, 2024
645e03b
changing envrionment variable name
fcollman Apr 21, 2024
dbac6a4
changing to environ variable to int
fcollman Apr 22, 2024
107a359
fix voxel offset
fcollman Apr 22, 2024
1fef6a9
fix: reindex to fix KeyError when index mismatched
dlbrittain Apr 23, 2024
ed0320d
test: add basic df tests
dlbrittain Apr 23, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 9 additions & 5 deletions .github/workflows/materialization_ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,10 @@ jobs:

name: Test against different Python versions
runs-on: ubuntu-latest
strategy:
matrix:
python-version: ["3.10"]

services:
postgres:
image: postgis/postgis:13-master
Expand All @@ -32,11 +36,11 @@ jobs:
- 5432:5432
options: --health-cmd pg_isready --health-interval 10s --health-timeout 5s --health-retries 5
steps:
- uses: actions/checkout@v4
- name: Set up Python 3.9
uses: actions/setup-python@v5
- uses: actions/checkout@v2
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v2
with:
python-version: 3.9
python-version: ${{ matrix.python-version }}
- uses: actions/cache@v2
with:
path: ~/.cache/pip
Expand All @@ -47,7 +51,7 @@ jobs:
with:
auto-update-conda: true
auto-activate-base: true
python-version: 3.9
python-version: ${{ matrix.python-version }}
- name: Install dependencies
shell: bash -l {0}
run: |
Expand Down
2 changes: 1 addition & 1 deletion Dockerfile
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
FROM tiangolo/uwsgi-nginx-flask:python3.9
FROM tiangolo/uwsgi-nginx-flask:python3.10

ENV UWSGI_INI /app/uwsgi.ini
RUN mkdir -p /home/nginx/.cloudvolume/secrets \
Expand Down
2 changes: 1 addition & 1 deletion materializationengine/blueprints/client/query.py
Original file line number Diff line number Diff line change
Expand Up @@ -155,7 +155,7 @@ def _fix_decimal_column(df_col):
if np.all(is_integer_col(df_col)):
return df_col.apply(int)
else:
return df_col.apply(np.float)
return df_col.apply(float)


def get_column(model, column):
Expand Down
58 changes: 53 additions & 5 deletions materializationengine/blueprints/materialize/api.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
import datetime
import logging

import redis
from dynamicannotationdb.models import AnalysisTable, Base
from flask import abort, current_app, request
from flask import abort, current_app, request, jsonify
from flask_accepts import accepts
from flask_restx import Namespace, Resource, inputs, reqparse
from materializationengine.blueprints.reset_auth import reset_auth
Expand Down Expand Up @@ -61,6 +61,13 @@
materialize_parser.add_argument("days_to_expire", required=True, default=None, type=int)
materialize_parser.add_argument("merge_tables", required=True, type=inputs.boolean)


spatial_svid_parser = reqparse.RequestParser()
spatial_svid_parser.add_argument("chunk_scale_factor", default=12, type=int)
spatial_svid_parser.add_argument("get_root_ids", default=True, type=inputs.boolean)
spatial_svid_parser.add_argument("upload_to_database", default=True, type=inputs.boolean)


authorizations = {
"apikey": {"type": "apiKey", "in": "query", "name": "middle_auth_token"}
}
Expand Down Expand Up @@ -233,11 +240,52 @@ def post(self, datastack_name: str, table_name: str):
).apply_async()
return 200


@mat_bp.expect(spatial_svid_parser)
@mat_bp.route(
"/materialize/run/dense_lookup_root_ids/datastack/<string:datastack_name>"
"/materialize/run/spatial_lookup/datastack/<string:datastack_name>/<string:table_name>"
)
class LookupDenseMissingRootIdsResource(Resource):
class SpatialSVIDLookupTableResource(Resource):
@reset_auth
@auth_requires_permission("edit", table_arg="datastack_name")
@mat_bp.doc("Lookup spatially chunked svid workflow", security="apikey")
def post(self, datastack_name: str, table_name: str):
"""Process newly added annotations and lookup segmentation data using
a spatially chunked svid lookup strategy. Optionally also lookups root ids.

Args:
datastack_name (str): name of datastack from infoservice
table_name (str): name of table
"""
from materializationengine.workflows.spatial_lookup import (
run_spatial_lookup_workflow,
)
args = spatial_svid_parser.parse_args()


if datastack_name not in current_app.config["DATASTACKS"]:
abort(404, f"datastack {datastack_name} not configured for materialization")

datastack_info = get_datastack_info(datastack_name)

chunk_scale_factor = args["chunk_scale_factor"]
get_root_ids = args["get_root_ids"]
upload_to_database = args["upload_to_database"]
try:
run_spatial_lookup_workflow.si(
datastack_info,
table_name=table_name,
chunk_scale_factor=chunk_scale_factor,
get_root_ids=get_root_ids,
upload_to_database=upload_to_database,
).apply_async()
except Exception as e:
logging.error(e)
return abort(400, f"Error running spatial lookup workflow: {e}")
return 200


@mat_bp.route("/materialize/run/lookup_root_ids/datastack/<string:datastack_name>")
class LookupMissingRootIdsResource(Resource):
@reset_auth
@auth_requires_admin
@mat_bp.doc("Find all null root ids and lookup new roots", security="apikey")
Expand Down
1 change: 1 addition & 0 deletions materializationengine/celery_init.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
"materializationengine.workflows.dummy_workflow",
"materializationengine.workflows.periodic_database_removal",
"materializationengine.workflows.periodic_materialization",
"materializationengine.workflows.spatial_lookup",
"materializationengine.shared_tasks",
"materializationengine.views",
"materializationengine.monitor",
Expand Down
61 changes: 61 additions & 0 deletions materializationengine/cloudvolume_gateway.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
import cloudvolume
import os


class CloudVolumeGateway:
"""A class to manage cloudvolume clients and cache them for reuse."""

def __init__(self, lru_bytes: int = 0):
self._cv_clients = {}
self._lru_bytes = lru_bytes

def get_cv(self, seg_source: str, mip_level: int = 0) -> cloudvolume.CloudVolume:
"""A function to get a cloudvolume client for a given source.

Args:
seg_source (str): The cloudvolume source string.
mip_level (int, optional): The MIP level to use. Defaults to 0.

Returns:
cloudvolume.CloudVolume: The cloudvolume client.
"""
seg_source_key = seg_source.split("/")[-1]
return (
self._get_cv_client(seg_source, seg_source_key, mip_level)
if seg_source_key not in self._cv_clients
else self._cv_clients[seg_source_key]
)

def _get_cv_client(
self, seg_source: str, seg_source_key: str, mip_level: int = 0
) -> cloudvolume.CloudVolume:
"""A helper function to create a cloudvolume client and cache it for reuse.

Args:
seg_source (str): The cloudvolume source string.
seg_source_key (str): The cloudvolume source key name to use for caching.
mip_level (int, optional): The MIP level to use. Defaults to 0.

Returns:
cloudvolume.CloudVolume: _description_
"""
cv_client = cloudvolume.CloudVolume(
seg_source,
mip=mip_level,
use_https=True,
bounded=False,
fill_missing=True,
lru_bytes=self._lru_bytes,
)

self._cv_clients[seg_source_key] = cv_client
return self._cv_clients[seg_source_key]

def invalidate_cache(self):
"""Clear the cache of cloudvolume clients."""
self._cv_clients = {}


cloudvolume_cache = CloudVolumeGateway(
lru_bytes=int(os.environ.get("CELERY_CLOUDVOLUME_CACHE_BYTES", 0))
)
14 changes: 9 additions & 5 deletions materializationengine/workflows/ingest_new_annotations.py
Original file line number Diff line number Diff line change
Expand Up @@ -655,8 +655,8 @@ def ingest_new_annotations_workflow(mat_metadata: dict):
return fin.si()


@celery.task(name="workflow:create_missing_segmentation_table")
def create_missing_segmentation_table(mat_metadata: dict) -> dict:
@celery.task(name="workflow:create_missing_segmentation_table", acks_late=True)
def create_missing_segmentation_table(mat_metadata: dict) -> bool:
"""Create missing segmentation tables associated with an annotation table if it
does not already exist.

Expand All @@ -666,7 +666,7 @@ def create_missing_segmentation_table(mat_metadata: dict) -> dict:
Materialization metadata

Returns:
dict: Materialization metadata
bool: if segmentation table was created or already exists
"""
segmentation_table_name = mat_metadata.get("segmentation_table_name")
aligned_volume = mat_metadata.get("aligned_volume")
Expand Down Expand Up @@ -698,6 +698,8 @@ def create_missing_segmentation_table(mat_metadata: dict) -> dict:
except Exception as e:
celery_logger.error(f"SQL ERROR: {e}")
session.rollback()
finally:
session.close()
else:
session.close()
return True
Expand Down Expand Up @@ -993,8 +995,10 @@ def get_new_root_ids(materialization_data: dict, mat_metadata: dict) -> dict:

cg_client = chunkedgraph_cache.init_pcg(pcg_table_name)

# filter missing root_ids and lookup root_ids if missing
mask = np.logical_and.reduce([root_ids_df[col].isna() for col in cols])
# filter missing root_ids and lookup root_ids if missing or zero
mask = np.logical_and.reduce(
[(root_ids_df[col].isna() | (root_ids_df[col] == 0)) for col in cols]
)
missing_root_rows = root_ids_df.loc[mask]
if not missing_root_rows.empty:
supervoxel_data = missing_root_rows.loc[:, supervoxel_col_names]
Expand Down
Loading
Loading