@@ -114,8 +114,11 @@ def _add_author_agents(authors, doc, entity, reg_uri_prefix, vocab_namespaces):
114
114
entity ,
115
115
author_agent ,
116
116
None ,
117
- {PROV_ROLE : QualifiedName (
118
- vocab_namespaces [DCTERMS_VOCAB_PREFIX ], 'creator' )},
117
+ {
118
+ PROV_ROLE : QualifiedName (
119
+ vocab_namespaces [DCTERMS_VOCAB_PREFIX ], 'creator'
120
+ )
121
+ },
119
122
)
120
123
121
124
@@ -148,7 +151,7 @@ def _add_code_repo_release(
148
151
(
149
152
(
150
153
PROV_TYPE ,
151
- QualifiedName (vocab_namespaces [DCMITYPE_VOCAB_PREFIX ], 'Software' )
154
+ QualifiedName (vocab_namespaces [DCMITYPE_VOCAB_PREFIX ], 'Software' ),
152
155
),
153
156
* _generate_object_meta (code_repo , vocab_namespaces ),
154
157
(
@@ -352,7 +355,6 @@ def _add_linked_files(
352
355
cr_activity ,
353
356
doc ,
354
357
dp_entity ,
355
- dp_id ,
356
358
input_objects ,
357
359
object_components ,
358
360
reg_uri_prefix ,
@@ -364,24 +366,29 @@ def _add_linked_files(
364
366
@param cr_activity: a prov.activity representing the code run
365
367
@param doc: a ProvDocument that the entities will belong to
366
368
@param dp_entity: a prov.entity representing the data_product
367
- @param dp_id: the data_product id
368
369
@param input_objects: boolean, 'True' if the object_components represent input
369
370
objects
370
371
@param object_components: a list of object_components from the ObjectComponent table
371
372
@param reg_uri_prefix: a str containing the name of the prefix
372
373
@param vocab_namespaces: a dict containing the Namespaces for the vocab
373
374
375
+ @return a list of data products that were added
376
+
374
377
"""
375
378
for component in object_components :
376
379
obj = component .object
377
380
data_products = obj .data_products .all ()
378
381
379
382
for data_product in data_products :
380
- if not input_objects and data_product .id == dp_id :
381
- # we have already added the original data product
383
+ file_id = f'{ reg_uri_prefix } :api/data_product/{ data_product .id } '
384
+
385
+ entity = doc .get_record (file_id )
386
+ # check to see if we have already created an entity for this data product
387
+ if len (entity ) > 0 :
388
+ # The prov documentation says a ProvRecord is returned, but actually a
389
+ # list of ProvRecord is returned
382
390
continue
383
391
384
- file_id = f'{ reg_uri_prefix } :api/data_product/{ data_product .id } '
385
392
file_entity = doc .entity (
386
393
file_id ,
387
394
(
@@ -421,6 +428,8 @@ def _add_linked_files(
421
428
# add the link to the code run
422
429
doc .wasGeneratedBy (file_entity , cr_activity )
423
430
431
+ return data_products
432
+
424
433
425
434
def _add_model_config (cr_activity , doc , model_config , reg_uri_prefix , vocab_namespaces ):
426
435
"""
@@ -458,6 +467,48 @@ def _add_model_config(cr_activity, doc, model_config, reg_uri_prefix, vocab_name
458
467
)
459
468
460
469
470
+ def _add_prime_data_product (doc , data_product , reg_uri_prefix , vocab_namespaces ):
471
+ """
472
+ Add the prime data product for this level of the provenance report.
473
+
474
+ @param doc: a ProvDocument that the entities will belong to
475
+ @param data_product: The DataProduct to generate the PROV document for
476
+ @param reg_uri_prefix: a str containing the name of the prefix
477
+ @param vocab_namespaces: a dict containing the Namespaces for the vocab
478
+
479
+ @return the data product entity
480
+
481
+ """
482
+ data_product_id = f'{ reg_uri_prefix } :api/data_product/{ data_product .id } '
483
+ entity = doc .get_record (data_product_id )
484
+ # check to see if we have already created an entity for this data product
485
+ if len (entity ) > 0 :
486
+ # The prov documentation says a ProvRecord is returned, but actually a
487
+ # list of ProvRecord is returned
488
+ return entity [0 ]
489
+
490
+ # add the data product
491
+ dp_entity = doc .entity (
492
+ f'{ reg_uri_prefix } :api/data_product/{ data_product .id } ' ,
493
+ (
494
+ (PROV_TYPE , QualifiedName (vocab_namespaces [DCAT_VOCAB_PREFIX ], 'Dataset' )),
495
+ * _generate_object_meta (data_product .object , vocab_namespaces ),
496
+ ),
497
+ )
498
+
499
+ _add_author_agents (
500
+ data_product .object .authors .all (),
501
+ doc ,
502
+ dp_entity ,
503
+ reg_uri_prefix ,
504
+ vocab_namespaces ,
505
+ )
506
+
507
+ _add_external_object (doc , data_product , dp_entity , reg_uri_prefix , vocab_namespaces )
508
+
509
+ return dp_entity
510
+
511
+
461
512
def _add_submission_script (
462
513
cr_activity , doc , submission_script , reg_uri_prefix , vocab_namespaces
463
514
):
@@ -476,9 +527,10 @@ def _add_submission_script(
476
527
(
477
528
(
478
529
PROV_TYPE ,
479
- QualifiedName (vocab_namespaces [DCMITYPE_VOCAB_PREFIX ], 'Software' )
530
+ QualifiedName (vocab_namespaces [DCMITYPE_VOCAB_PREFIX ], 'Software' ),
480
531
),
481
- * _generate_object_meta (submission_script , vocab_namespaces ),),
532
+ * _generate_object_meta (submission_script , vocab_namespaces ),
533
+ ),
482
534
)
483
535
484
536
_add_author_agents (
@@ -501,69 +553,26 @@ def _add_submission_script(
501
553
)
502
554
503
555
504
- def get_whole_object_component (components ):
505
- for component in components :
506
- if component .whole_object :
507
- return component
508
-
509
-
510
- def generate_prov_document (data_product , request ):
556
+ def _generate_prov_document (doc , data_product , reg_uri_prefix , vocab_namespaces ):
511
557
"""
512
- Generate a PROV document for a DataProduct detailing all the input and outputs and
513
- how they were generated.
558
+ Add the next level to the provenance doc.
514
559
515
- This uses the W3C PROV ontology (https://www.w3.org/TR/prov-o/).
560
+ This takes a data product and finds generates it provenance. A list of input files
561
+ are returned so they can be used to create the next level of the provenane if
562
+ needed.
516
563
517
- :param data_product: The DataProduct to generate the PROV document for
564
+ @param doc: a ProvDocument that the entities will belong to
565
+ @param data_product: The DataProduct to generate the PROV document for
566
+ @param reg_uri_prefix: a str containing the name of the prefix
567
+ @param vocab_namespaces: a dict containing the Namespaces for the vocab
518
568
519
- : return: A PROV-O document
569
+ @ return a list of files that were used as input files, may be empty
520
570
521
571
"""
522
- url = request .build_absolute_uri ('/' )
523
- cenral_registry_url = settings .CENTRAL_REGISTRY_URL
524
- if not cenral_registry_url .endswith ('/' ):
525
- cenral_registry_url = f'{ cenral_registry_url } /'
526
-
527
- doc = prov .model .ProvDocument ()
528
-
529
- if url == cenral_registry_url :
530
- # we are using the main registry
531
- reg_uri_prefix = 'reg'
532
- doc .add_namespace (reg_uri_prefix , cenral_registry_url )
533
- else :
534
- # we are using a local registry
535
- reg_uri_prefix = 'lreg'
536
- doc .add_namespace (reg_uri_prefix , url )
537
-
538
- # the vocab namespace is always the main registry
539
- doc .add_namespace (FAIR_VOCAB_PREFIX , f'{ cenral_registry_url } vocab/#' )
540
- # we need to tell SONAR to ignore 'http' in the vocab URLs
541
- doc .add_namespace (DCAT_VOCAB_PREFIX , 'http://www.w3.org/ns/dcat#' ) # NOSONAR
542
- doc .add_namespace (DCMITYPE_VOCAB_PREFIX , 'http://purl.org/dc/dcmitype/' ) # NOSONAR
543
- doc .add_namespace (DCTERMS_VOCAB_PREFIX , 'http://purl.org/dc/terms/' ) # NOSONAR
544
- doc .add_namespace (FOAF_VOCAB_PREFIX , 'http://xmlns.com/foaf/spec/#' ) # NOSONAR
545
-
546
- vocab_namespaces = {}
547
- for namespace in doc .get_registered_namespaces ():
548
- vocab_namespaces [namespace .prefix ] = namespace
549
-
550
- # add the data product
551
- dp_entity = doc .entity (
552
- f'{ reg_uri_prefix } :api/data_product/{ data_product .id } ' ,
553
- (
554
- (PROV_TYPE , QualifiedName (vocab_namespaces [DCAT_VOCAB_PREFIX ], 'Dataset' )),
555
- * _generate_object_meta (data_product .object , vocab_namespaces ),
556
- ),
557
- )
558
-
559
- _add_author_agents (
560
- data_product .object .authors .all (),
561
- doc ,
562
- dp_entity ,
563
- reg_uri_prefix ,
564
- vocab_namespaces ,
572
+ # add the the root data product
573
+ dp_entity = _add_prime_data_product (
574
+ doc , data_product , reg_uri_prefix , vocab_namespaces
565
575
)
566
- _add_external_object (doc , data_product , dp_entity , reg_uri_prefix , vocab_namespaces )
567
576
568
577
# add the activity, i.e. the code run
569
578
components = data_product .object .components .all ()
@@ -572,11 +581,12 @@ def generate_prov_document(data_product, request):
572
581
code_run = whole_object .outputs_of .all ()[0 ]
573
582
except IndexError :
574
583
# there is no code run so we cannot add any more provenance data
575
- return doc
584
+ return []
576
585
577
586
# add the code run, this is the central activity
578
587
cr_activity = _add_code_run (
579
- dp_entity , doc , code_run , reg_uri_prefix , vocab_namespaces )
588
+ dp_entity , doc , code_run , reg_uri_prefix , vocab_namespaces
589
+ )
580
590
581
591
# add the code repo release
582
592
if code_run .code_repo is not None :
@@ -596,11 +606,10 @@ def generate_prov_document(data_product, request):
596
606
)
597
607
598
608
# add input files
599
- _add_linked_files (
609
+ input_files = _add_linked_files (
600
610
cr_activity ,
601
611
doc ,
602
612
dp_entity ,
603
- None ,
604
613
True ,
605
614
code_run .inputs .all (),
606
615
reg_uri_prefix ,
@@ -612,19 +621,89 @@ def generate_prov_document(data_product, request):
612
621
cr_activity ,
613
622
doc ,
614
623
dp_entity ,
615
- data_product .id ,
616
624
False ,
617
625
code_run .outputs .all (),
618
626
reg_uri_prefix ,
619
627
vocab_namespaces ,
620
628
)
621
629
630
+ return input_files
631
+
632
+
633
+ def get_whole_object_component (components ):
634
+ for component in components :
635
+ if component .whole_object :
636
+ return component
637
+
638
+
639
+ def generate_prov_document (data_product , depth , request ):
640
+ """
641
+ Generate a PROV document for a DataProduct detailing all the input and outputs and
642
+ how they were generated.
643
+
644
+ This uses the W3C PROV ontology (https://www.w3.org/TR/prov-o/).
645
+
646
+ :param data_product: The DataProduct to generate the PROV document for
647
+ :param depth: The depth for the document. How many levels of code runs to include.
648
+ :param request: A request object
649
+
650
+ :return: A PROV-O document
651
+
652
+ """
653
+ url = request .build_absolute_uri ('/' )
654
+ cenral_registry_url = settings .CENTRAL_REGISTRY_URL
655
+ if not cenral_registry_url .endswith ('/' ):
656
+ cenral_registry_url = f'{ cenral_registry_url } /'
657
+
658
+ doc = prov .model .ProvDocument ()
659
+
660
+ if url == cenral_registry_url :
661
+ # we are using the main registry
662
+ reg_uri_prefix = 'reg'
663
+ doc .add_namespace (reg_uri_prefix , cenral_registry_url )
664
+ else :
665
+ # we are using a local registry
666
+ reg_uri_prefix = 'lreg'
667
+ doc .add_namespace (reg_uri_prefix , url )
668
+
669
+ # the vocab namespace is always the main registry
670
+ doc .add_namespace (FAIR_VOCAB_PREFIX , f'{ cenral_registry_url } vocab/#' )
671
+ # we need to tell SONAR to ignore 'http' in the vocab URLs
672
+ doc .add_namespace (DCAT_VOCAB_PREFIX , 'http://www.w3.org/ns/dcat#' ) # NOSONAR
673
+ doc .add_namespace (DCMITYPE_VOCAB_PREFIX , 'http://purl.org/dc/dcmitype/' ) # NOSONAR
674
+ doc .add_namespace (DCTERMS_VOCAB_PREFIX , 'http://purl.org/dc/terms/' ) # NOSONAR
675
+ doc .add_namespace (FOAF_VOCAB_PREFIX , 'http://xmlns.com/foaf/spec/#' ) # NOSONAR
676
+
677
+ vocab_namespaces = {}
678
+ for namespace in doc .get_registered_namespaces ():
679
+ vocab_namespaces [namespace .prefix ] = namespace
680
+
681
+ # get the initial set of input files
682
+ input_files = _generate_prov_document (
683
+ doc , data_product , reg_uri_prefix , vocab_namespaces
684
+ )
685
+
686
+ if depth == 1 :
687
+ return doc
688
+
689
+ # add extra layers to the report if requested by the user
690
+ while depth > 1 :
691
+ next_level_input_files = []
692
+
693
+ for input_file in input_files :
694
+ next_input_files = _generate_prov_document (
695
+ doc , input_file , reg_uri_prefix , vocab_namespaces
696
+ )
697
+ next_level_input_files .extend (next_input_files )
698
+
699
+ # reset the input files for the next level
700
+ input_files = next_level_input_files
701
+ depth = depth - 1
702
+
622
703
return doc
623
704
624
705
625
- def serialize_prov_document (
626
- doc , format_ , aspect_ratio , dpi = None , show_attributes = True
627
- ):
706
+ def serialize_prov_document (doc , format_ , aspect_ratio , dpi = None , show_attributes = True ):
628
707
"""
629
708
Serialise a PROV document as either a JPEG or SVG image or an XML or PROV-N report.
630
709
0 commit comments