3
3
"""
4
4
5
5
from collections import namedtuple
6
- import datetime
6
+ import datetime , logging
7
7
from os import environ
8
8
from copy import copy
9
9
from typing import Iterable , Optional
14
14
from graph_tool import Graph
15
15
16
16
from pychunkedgraph .graph import types
17
- from pychunkedgraph .graph .chunks import utils as chunk_utils
17
+ from pychunkedgraph .graph .chunks .utils import (
18
+ get_bounding_children_chunks ,
19
+ get_chunk_ids_from_coords ,
20
+ )
18
21
from pychunkedgraph .graph .utils import basetypes
19
22
20
23
from ..utils import basetypes
@@ -235,7 +238,9 @@ def get_latest_edges(
235
238
These parents would be the new identities for the stale `partner`.
236
239
"""
237
240
_nodes = np .unique (stale_edges [:, 1 ])
238
- nodes_ts_map = dict (zip (_nodes , cg .get_node_timestamps (_nodes , return_numpy = False )))
241
+ nodes_ts_map = dict (
242
+ zip (_nodes , cg .get_node_timestamps (_nodes , return_numpy = False , normalize = True ))
243
+ )
239
244
_nodes = np .unique (stale_edges )
240
245
layers , coords = cg .get_chunk_layers_and_coordinates (_nodes )
241
246
layers_d = dict (zip (_nodes , layers ))
@@ -252,31 +257,55 @@ def _get_normalized_coords(node_a, node_b) -> tuple:
252
257
coord_a , coord_b = cg .get_chunk_coordinates_multiple ([chunk_a , chunk_b ])
253
258
return max_layer , coord_a , coord_b
254
259
255
- def _get_l2chunkids_along_boundary (max_layer , coord_a , coord_b ):
260
+ def _get_l2chunkids_along_boundary (mlayer : int , coord_a , coord_b , padding : int = 0 ):
261
+ """
262
+ Gets L2 Chunk IDs along opposing faces for larger chunks.
263
+ If padding is enabled, more faces of L2 chunks are padded on both sides.
264
+ This is necessary to find fake edges that can span more than 2 L2 chunks.
265
+ """
256
266
direction = coord_a - coord_b
257
- axis = np .flatnonzero (direction )
258
- assert len (axis ) == 1 , f"{ direction } , { coord_a } , { coord_b } "
259
- axis = axis [0 ]
260
- children_a = chunk_utils .get_bounding_children_chunks (
261
- cg .meta , max_layer , coord_a , children_layer = 2
262
- )
263
- children_b = chunk_utils .get_bounding_children_chunks (
264
- cg .meta , max_layer , coord_b , children_layer = 2
265
- )
266
- if direction [axis ] > 0 :
267
- mid = coord_a [axis ] * 2 ** (max_layer - 2 )
268
- l2chunks_a = children_a [children_a [:, axis ] == mid ]
269
- l2chunks_b = children_b [children_b [:, axis ] == mid - 1 ]
270
- else :
271
- mid = coord_b [axis ] * 2 ** (max_layer - 2 )
272
- l2chunks_a = children_a [children_a [:, axis ] == mid - 1 ]
273
- l2chunks_b = children_b [children_b [:, axis ] == mid ]
267
+ major_axis = np .argmax (np .abs (direction ))
268
+ bounds_a = get_bounding_children_chunks (cg .meta , mlayer , tuple (coord_a ), 2 )
269
+ bounds_b = get_bounding_children_chunks (cg .meta , mlayer , tuple (coord_b ), 2 )
270
+
271
+ l2chunk_count = 2 ** (mlayer - 2 )
272
+ max_coord = coord_a if direction [major_axis ] > 0 else coord_b
273
+
274
+ skip = abs (direction [major_axis ]) - 1
275
+ l2_skip = skip * l2chunk_count
274
276
275
- l2chunk_ids_a = chunk_utils .get_chunk_ids_from_coords (cg .meta , 2 , l2chunks_a )
276
- l2chunk_ids_b = chunk_utils .get_chunk_ids_from_coords (cg .meta , 2 , l2chunks_b )
277
+ mid = max_coord [major_axis ] * l2chunk_count
278
+ face_a = mid if direction [major_axis ] > 0 else (mid - l2_skip - 1 )
279
+ face_b = mid if direction [major_axis ] < 0 else (mid - l2_skip - 1 )
280
+
281
+ l2chunks_a = [bounds_a [bounds_a [:, major_axis ] == face_a ]]
282
+ l2chunks_b = [bounds_b [bounds_b [:, major_axis ] == face_b ]]
283
+
284
+ step_a , step_b = (1 , - 1 ) if direction [major_axis ] > 0 else (- 1 , 1 )
285
+ for _ in range (padding ):
286
+ _l2_chunks_a = copy (l2chunks_a [- 1 ])
287
+ _l2_chunks_b = copy (l2chunks_b [- 1 ])
288
+ _l2_chunks_a [:, major_axis ] += step_a
289
+ _l2_chunks_b [:, major_axis ] += step_b
290
+ l2chunks_a .append (_l2_chunks_a )
291
+ l2chunks_b .append (_l2_chunks_b )
292
+
293
+ l2chunks_a = np .concatenate (l2chunks_a )
294
+ l2chunks_b = np .concatenate (l2chunks_b )
295
+
296
+ l2chunk_ids_a = get_chunk_ids_from_coords (cg .meta , 2 , l2chunks_a )
297
+ l2chunk_ids_b = get_chunk_ids_from_coords (cg .meta , 2 , l2chunks_b )
277
298
return l2chunk_ids_a , l2chunk_ids_b
278
299
279
- def _get_filtered_l2ids (node_a , node_b , chunks_map ):
300
+ def _get_filtered_l2ids (node_a , node_b , padding : int ):
301
+ """
302
+ Finds L2 IDs along opposing faces for given nodes.
303
+ Filterting is done by first finding L2 chunks along these faces.
304
+ Then get their parent chunks iteratively.
305
+ Then filter children iteratively using these chunks.
306
+ """
307
+ chunks_map = {}
308
+
280
309
def _filter (node ):
281
310
result = []
282
311
children = np .array ([node ], dtype = basetypes .NODE_ID )
@@ -294,59 +323,67 @@ def _filter(node):
294
323
children = cg .get_children (children [mask ], flatten = True )
295
324
return np .concatenate (result )
296
325
297
- return _filter (node_a ), _filter (node_b )
298
-
299
- result = [types .empty_2d ]
300
- chunks_map = {}
301
- for edge_layer , _edge in zip (edge_layers , stale_edges ):
302
- node_a , node_b = _edge
303
326
mlayer , coord_a , coord_b = _get_normalized_coords (node_a , node_b )
304
- chunks_a , chunks_b = _get_l2chunkids_along_boundary (mlayer , coord_a , coord_b )
327
+ chunks_a , chunks_b = _get_l2chunkids_along_boundary (
328
+ mlayer , coord_a , coord_b , padding
329
+ )
305
330
306
- chunks_map [node_a ] = [np . array ( [cg .get_chunk_id (node_a )]) ]
307
- chunks_map [node_b ] = [np . array ( [cg .get_chunk_id (node_b )]) ]
331
+ chunks_map [node_a ] = [[cg .get_chunk_id (node_a )]]
332
+ chunks_map [node_b ] = [[cg .get_chunk_id (node_b )]]
308
333
_layer = 2
309
334
while _layer < mlayer :
310
335
chunks_map [node_a ].append (chunks_a )
311
336
chunks_map [node_b ].append (chunks_b )
312
337
chunks_a = np .unique (cg .get_parent_chunk_id_multiple (chunks_a ))
313
338
chunks_b = np .unique (cg .get_parent_chunk_id_multiple (chunks_b ))
314
339
_layer += 1
315
- chunks_map [node_a ] = np .concatenate (chunks_map [node_a ]).astype (basetypes .NODE_ID )
316
- chunks_map [node_b ] = np .concatenate (chunks_map [node_b ]).astype (basetypes .NODE_ID )
340
+ chunks_map [node_a ] = np .concatenate (chunks_map [node_a ])
341
+ chunks_map [node_b ] = np .concatenate (chunks_map [node_b ])
342
+ return int (mlayer ), _filter (node_a ), _filter (node_b )
317
343
318
- l2ids_a , l2ids_b = _get_filtered_l2ids (node_a , node_b , chunks_map )
344
+ result = [types .empty_2d ]
345
+ for edge_layer , _edge in zip (edge_layers , stale_edges ):
346
+ node_a , node_b = _edge
347
+ mlayer , l2ids_a , l2ids_b = _get_filtered_l2ids (node_a , node_b , padding = 0 )
348
+ if l2ids_a .size == 0 or l2ids_b .size == 0 :
349
+ logging .info (f"{ node_a } , { node_b } , expanding search with padding." )
350
+ mlayer , l2ids_a , l2ids_b = _get_filtered_l2ids (node_a , node_b , padding = 2 )
351
+ logging .info (f"Found { l2ids_a } and { l2ids_b } " )
352
+
353
+ _edges = []
319
354
edges_d = cg .get_cross_chunk_edges (
320
355
node_ids = l2ids_a , time_stamp = nodes_ts_map [node_b ], raw_only = True
321
356
)
322
-
323
- _edges = []
324
357
for v in edges_d .values ():
325
358
_edges .append (v .get (edge_layer , types .empty_2d ))
326
- _edges = np .concatenate (_edges )
327
- mask = np .isin (_edges [:, 1 ], l2ids_b )
328
359
329
- children_b = cg .get_children (_edges [mask ][:, 1 ], flatten = True )
360
+ try :
361
+ _edges = np .concatenate (_edges )
362
+ except ValueError as exc :
363
+ logging .warning (f"No edges found for { node_a } , { node_b } " )
364
+ raise ValueError from exc
330
365
366
+ mask = np .isin (_edges [:, 1 ], l2ids_b )
331
367
parents_a = _edges [mask ][:, 0 ]
368
+ children_b = cg .get_children (_edges [mask ][:, 1 ], flatten = True )
332
369
parents_b = np .unique (cg .get_parents (children_b , time_stamp = parent_ts ))
333
- _cx_edges_d = cg .get_cross_chunk_edges (parents_b )
370
+ _cx_edges_d = cg .get_cross_chunk_edges (parents_b , time_stamp = parent_ts )
334
371
parents_b = []
335
372
for _node , _edges_d in _cx_edges_d .items ():
336
373
for _edges in _edges_d .values ():
337
- _mask = np .isin (_edges [:,1 ], parents_a )
374
+ _mask = np .isin (_edges [:, 1 ], parents_a )
338
375
if np .any (_mask ):
339
376
parents_b .append (_node )
340
377
341
378
parents_b = np .array (parents_b , dtype = basetypes .NODE_ID )
342
379
parents_b = np .unique (
343
- cg .get_roots (
344
- parents_b , stop_layer = mlayer , ceil = False , time_stamp = parent_ts
345
- )
380
+ cg .get_roots (parents_b , stop_layer = mlayer , ceil = False , time_stamp = parent_ts )
346
381
)
347
382
348
383
parents_a = np .array ([node_a ] * parents_b .size , dtype = basetypes .NODE_ID )
349
- result .append (np .column_stack ((parents_a , parents_b )))
384
+ _new_edges = np .column_stack ((parents_a , parents_b ))
385
+ assert _new_edges .size , f"No edge found for { node_a } , { node_b } at { parent_ts } "
386
+ result .append (_new_edges )
350
387
return np .concatenate (result )
351
388
352
389
0 commit comments