1212import tensorstore as ts
1313import zstandard as zstd
1414from graph_tool import Graph
15+ from cachetools import LRUCache
1516
1617from pychunkedgraph .graph import types
1718from pychunkedgraph .graph .chunks .utils import (
2122from pychunkedgraph .graph .utils import basetypes
2223
2324from ..utils import basetypes
25+ from ..utils .generic import get_parents_at_timestamp
2426
2527
2628_edge_type_fileds = ("in_chunk" , "between_chunk" , "cross_chunk" )
3941 ]
4042)
4143ZSTD_EDGE_COMPRESSION = 17
44+ PARENTS_CACHE = LRUCache (256 * 1024 )
4245
4346
4447class Edges :
@@ -341,7 +344,72 @@ def _filter(node):
341344 chunks_map [node_b ] = np .concatenate (chunks_map [node_b ])
342345 return int (mlayer ), _filter (node_a ), _filter (node_b )
343346
344- def _get_new_edge (edge , parent_ts , padding ):
347+ def _populate_parents_cache (children : np .ndarray ):
348+ global PARENTS_CACHE
349+
350+ not_cached = []
351+ for child in children :
352+ try :
353+ # reset lru index, these will be needed soon
354+ _ = PARENTS_CACHE [child ]
355+ except KeyError :
356+ not_cached .append (child )
357+
358+ all_parents = cg .get_parents (not_cached , current = False )
359+ for child , parents in zip (not_cached , all_parents ):
360+ PARENTS_CACHE [child ] = {}
361+ for parent , ts in parents :
362+ PARENTS_CACHE [child ][ts ] = parent
363+
364+ def _get_parents_b (edges , parent_ts , layer ):
365+ """
366+ Attempts to find new partner side nodes.
367+ Gets new partners at parent_ts using supervoxels, at `parent_ts`.
368+ Searches for new partners that may have any edges to `edges[:,0]`.
369+ """
370+ children_b = cg .get_children (edges [:, 1 ], flatten = True )
371+ _populate_parents_cache (children_b )
372+ _parents_b , missing = get_parents_at_timestamp (
373+ children_b , PARENTS_CACHE , time_stamp = parent_ts , unique = True
374+ )
375+ # handle cache miss cases
376+ _parents_b_missing = np .unique (cg .get_parents (missing , time_stamp = parent_ts ))
377+ parents_b = np .concatenate ([_parents_b , _parents_b_missing ])
378+
379+ parents_a = edges [:, 0 ]
380+ stale_a = get_stale_nodes (cg , parents_a , parent_ts = parent_ts )
381+ if stale_a .size == parents_a .size :
382+ # this is applicable only for v2 to v3 migration
383+ # handle cases when source nodes in `edges[:,0]` are stale
384+ atomic_edges_d = cg .get_atomic_cross_edges (stale_a )
385+ partners = [types .empty_1d ]
386+ for _edges_d in atomic_edges_d .values ():
387+ _edges = _edges_d .get (layer , types .empty_2d )
388+ partners .append (_edges [:, 1 ])
389+ partners = np .concatenate (partners )
390+ return np .unique (cg .get_parents (partners , time_stamp = parent_ts ))
391+
392+ _cx_edges_d = cg .get_cross_chunk_edges (parents_b , time_stamp = parent_ts )
393+ _parents_b = []
394+ for _node , _edges_d in _cx_edges_d .items ():
395+ for _edges in _edges_d .values ():
396+ _mask = np .isin (_edges [:, 1 ], parents_a )
397+ if np .any (_mask ):
398+ _parents_b .append (_node )
399+ return np .array (_parents_b , dtype = basetypes .NODE_ID )
400+
401+ def _get_parents_b_with_chunk_mask (
402+ l2ids_b : np .ndarray , parents_b : np .ndarray , max_ts : datetime .datetime , edge
403+ ):
404+ chunks_old = cg .get_chunk_ids_from_node_ids (l2ids_b )
405+ chunks_new = cg .get_chunk_ids_from_node_ids (parents_b )
406+ chunk_mask = np .isin (chunks_new , chunks_old )
407+ parents_b = parents_b [chunk_mask ]
408+ _stale_nodes = get_stale_nodes (cg , parents_b , parent_ts = max_ts )
409+ assert _stale_nodes .size == 0 , f"{ edge } , { _stale_nodes } , { parent_ts } "
410+ return parents_b
411+
412+ def _get_new_edge (edge , edge_layer , parent_ts , padding ):
345413 """
346414 Attempts to find new edge(s) for the stale `edge`.
347415 * Find L2 IDs on opposite sides of the face in L2 chunks along the face.
@@ -353,11 +421,11 @@ def _get_new_edge(edge, parent_ts, padding):
353421 if l2ids_a .size == 0 or l2ids_b .size == 0 :
354422 return types .empty_2d .copy ()
355423
356- _edges = []
357424 max_node_ts = max (nodes_ts_map [node_a ], nodes_ts_map [node_b ])
358425 _edges_d = cg .get_cross_chunk_edges (
359426 node_ids = l2ids_a , time_stamp = max_node_ts , raw_only = True
360427 )
428+ _edges = []
361429 for v in _edges_d .values ():
362430 if edge_layer in v :
363431 _edges .append (v [edge_layer ])
@@ -369,27 +437,13 @@ def _get_new_edge(edge, parent_ts, padding):
369437
370438 mask = np .isin (_edges [:, 1 ], l2ids_b )
371439 if np .any (mask ):
372- parents_a = _edges [mask ][:, 0 ]
373- children_b = cg .get_children (_edges [mask ][:, 1 ], flatten = True )
374- parents_b = np .unique (cg .get_parents (children_b , time_stamp = parent_ts ))
375- _cx_edges_d = cg .get_cross_chunk_edges (parents_b , time_stamp = parent_ts )
376- parents_b = []
377- for _node , _edges_d in _cx_edges_d .items ():
378- for _edges in _edges_d .values ():
379- _mask = np .isin (_edges [:, 1 ], parents_a )
380- if np .any (_mask ):
381- parents_b .append (_node )
382- parents_b = np .array (parents_b , dtype = basetypes .NODE_ID )
440+ parents_b = _get_parents_b (_edges [mask ], parent_ts , edge_layer )
383441 else :
384442 # if none of `l2ids_b` were found in edges, `l2ids_a` already have new edges
385443 # so get the new identities of `l2ids_b` by using chunk mask
386- parents_b = _edges [:, 1 ]
387- chunks_old = cg .get_chunk_ids_from_node_ids (l2ids_b )
388- chunks_new = cg .get_chunk_ids_from_node_ids (parents_b )
389- chunk_mask = np .isin (chunks_new , chunks_old )
390- parents_b = parents_b [chunk_mask ]
391- _stale_nodes = get_stale_nodes (cg , parents_b , parent_ts = max_node_ts )
392- assert _stale_nodes .size == 0 , f"{ edge } , { _stale_nodes } , { parent_ts } "
444+ parents_b = _get_parents_b_with_chunk_mask (
445+ l2ids_b , _edges [:, 1 ], max_node_ts , edge
446+ )
393447
394448 parents_b = np .unique (
395449 cg .get_roots (parents_b , stop_layer = mlayer , ceil = False , time_stamp = parent_ts )
@@ -402,7 +456,7 @@ def _get_new_edge(edge, parent_ts, padding):
402456 for edge_layer , _edge in zip (edge_layers , stale_edges ):
403457 max_chebyshev_distance = int (environ .get ("MAX_CHEBYSHEV_DISTANCE" , 3 ))
404458 for pad in range (0 , max_chebyshev_distance ):
405- _new_edges = _get_new_edge (_edge , parent_ts , padding = pad )
459+ _new_edges = _get_new_edge (_edge , edge_layer , parent_ts , padding = pad )
406460 if _new_edges .size :
407461 break
408462 logging .info (f"{ _edge } , expanding search with padding { pad + 1 } ." )
@@ -446,7 +500,7 @@ def get_latest_edges_wrapper(
446500 stale_edge_layers ,
447501 parent_ts = parent_ts ,
448502 )
449- logging .debug (f"{ stale_edges } -> { latest_edges } ; { parent_ts } " )
503+ logging .debug (f"{ stale_edges } -> { latest_edges [:, 1 ]. tolist () } ; { parent_ts } " )
450504 _new_cx_edges .append (latest_edges )
451505 new_cx_edges_d [layer ] = np .concatenate (_new_cx_edges )
452506 nodes .append (np .unique (new_cx_edges_d [layer ]))
0 commit comments