From 8f03b719483853d59fe2dc3dedece899033662b3 Mon Sep 17 00:00:00 2001 From: Diego Pino Navarro Date: Wed, 1 May 2024 12:19:47 -0400 Subject: [PATCH 01/29] First pass.Nothing works yet. Don't even try testing --- src/Plugin/QueueWorker/CsvADOQueueWorker.php | 229 +++++++++++++++++++ 1 file changed, 229 insertions(+) create mode 100644 src/Plugin/QueueWorker/CsvADOQueueWorker.php diff --git a/src/Plugin/QueueWorker/CsvADOQueueWorker.php b/src/Plugin/QueueWorker/CsvADOQueueWorker.php new file mode 100644 index 0000000..ac4d8b1 --- /dev/null +++ b/src/Plugin/QueueWorker/CsvADOQueueWorker.php @@ -0,0 +1,229 @@ +getViaUri('private://')->getDirectoryPath(); + $handler = new StreamHandler($private_path.'/ami/logs/set'.$data->info['set_id'].'.log', Logger::DEBUG); + $handler->setFormatter(new JsonFormatter()); + $log->pushHandler($handler); + // This will add the File logger not replace the DB + // We can not use addLogger because in a single PHP process multiple Queue items might be invoked + // And loggers are additive. Means i can end with a few duplicated entries! + // @TODO: i should inject this into the Containers but i wanted to keep + // it simple for now. + $this->loggerFactory->get('ami_file')->setLoggers([[$log]]); + + /* Data info for an ADO has this structure + $data->info = [ + 'set_id' => The Set id + 'uid' => The User ID that processed the Set + 'set_url' => A direct URL to the set. + 'op_secondary' => applies only to Update/Patch operations. Can be one of 'update','replace','append' + 'ops_safefiles' => Boolean, True if we will not allow files/mappings to be removed/we will keep them warm and safe + 'log_jsonpatch' => If for Update operations we will generate a single PER ADO Log with a full JSON Patch, + 'attempt' => The number of attempts to process. We always start with a 1 + 'zip_file' => Zip File/File Entity + 'csv_file' => The CSV that will generate the ADO queue items. + 'queue_name' => because well ... we use Hydroponics too + 'time_submitted' => Timestamp on when the queue was sent. All Entries will share the same + ]; + // This will simply go to an alternate processing on this same Queue Worker + // Just for files. + */ + $adodata = clone $data; + $adodata->info = NULL; + if (!empty($data->info['csv_file']) && !empty($data->info['file_column'])) { + $invalid = []; + // Note. We won't process the nested CSV here. This queue worker only takes a CSV and splits into smaller + // chunks. Basically what the \Drupal\ami\Form\amiSetEntityProcessForm::submitForm already does. + // But the ADO worker itself will (new code) extract a CSV and then again, enqueue back to this so this one can yet again + // split into smaller chuncks and so on. + $info = $this->AmiUtilityService->preprocessAmiSet($data->info['csv_file'], $data, $invalid, FALSE); + foreach ($info as $item) { + // We set current User here since we want to be sure the final owner of + // the object is this and not the user that runs the queue + $adodata->info = [ + 'zip_file' => $data->info['zip_file'] ?? NULL, + 'row' => $item, + 'set_id' => $data->info['set_id'], + 'uid' => $data->info['uid'], + 'status' => $data->info['status'], + 'op_secondary' => $data->info['op_secondary'] ?? NULL, + 'ops_safefiles' => $data->info['ops_safefiles'] ? TRUE: FALSE, + 'log_jsonpatch' => FALSE, + 'set_url' => $data->info['set_url'], + 'attempt' => 1, + 'queue_name' => $data->info['queue_name'], + 'force_file_queue' => $data->info['force_file_queue'], + 'force_file_process' => $data->info['force_file_process'], + 'manyfiles' => $data->info['manyfiles'], + 'ops_skip_onmissing_file' => $data->info['ops_skip_onmissing_file'], + 'ops_forcemanaged_destination_file' => $data->info['ops_forcemanaged_destination_file'], + 'time_submitted' => $data->info['time_submitted'], + ]; + /*$added[] = \Drupal::queue($data->info['queue_name']) + ->createItem($adodata);*/ + $adodata; + } + return; + } + + // Before we do any processing. Check if Parent(s) exists? + // If not, re-enqueue: we try twice only. Should we try more? + $parent_nodes = []; + if (isset($data->info['row']['parent']) && is_array($data->info['row']['parent'])) { + $parents = $data->info['row']['parent']; + $parents = array_filter($parents); + foreach($parents as $parent_property => $parent_uuid) { + $parent_uuids = (array) $parent_uuid; + // We should validate each member to be an UUID here (again). Just in case. + $existing = $this->entityTypeManager->getStorage('node')->loadByProperties(['uuid' => $parent_uuids]); + if (count($existing) != count($parent_uuids)) { + $message = $this->t('Sorry, we can not process ADO with @uuid from Set @setid yet, there are missing parents with UUID(s) @parent_uuids. We will retry.',[ + '@uuid' => $data->info['row']['uuid'], + '@setid' => $data->info['set_id'], + '@parent_uuids' => implode(',', $parent_uuids) + ]); + $this->loggerFactory->get('ami_file')->warning($message ,[ + 'setid' => $data->info['set_id'] ?? NULL, + 'time_submitted' => $data->info['time_submitted'] ?? '', + ]); + + // Pushing to the end of the queue. + $data->info['attempt']++; + if ($data->info['attempt'] < 3) { + \Drupal::queue($data->info['queue_name']) + ->createItem($data); + return; + } + else { + $message = $this->t('Sorry, We tried twice to process ADO with @uuid from Set @setid yet, but you have missing parents. Please check your CSV file and make sure parents with an UUID are in your REPO first and that no other parent generated by the set itself is failing',[ + '@uuid' => $data->info['row']['uuid'], + '@setid' => $data->info['set_id'] + ]); + $this->loggerFactory->get('ami_file')->error($message ,[ + 'setid' => $data->info['set_id'] ?? NULL, + 'time_submitted' => $data->info['time_submitted'] ?? '', + ]); + $this->setStatus(amiSetEntity::STATUS_PROCESSING_WITH_ERRORS, $data); + return; + // We could enqueue in a "failed" queue? + // @TODO for 0.6.0: Or better. We could keep track of the dependency + // and then afterwards update one and then the other + // Why? Because if we have one object pointing to X + // and the other pointing back the graph is not acyclic + // but we could still via an update operation + // Ingest without the relations both. Then update both once the + // Ingest is ready IF both have IDs. + } + } + else { + // Get the IDs! + foreach($existing as $node) { + $parent_nodes[$parent_property][] = (int) $node->id(); + } + } + } + } + + $processed_metadata = NULL; + + $method = $data->mapping->globalmapping ?? "direct"; + if ($method == 'custom') { + $method = $data->mapping->custommapping_settings->{$data->info['row']['type']}->metadata ?? "direct"; + } + if ($method == 'template') { + $processed_metadata = $this->AmiUtilityService->processMetadataDisplay($data); + if (!$processed_metadata) { + $message = $this->t('Sorry, we can not cast ADO with @uuid into proper Metadata. Check the Metadata Display Template used, your permissions and/or your data ROW in your CSV for set @setid.',[ + '@uuid' => $data->info['row']['uuid'], + '@setid' => $data->info['set_id'] + ]); + $this->loggerFactory->get('ami_file')->error($message ,[ + 'setid' => $data->info['set_id'] ?? NULL, + 'time_submitted' => $data->info['time_submitted'] ?? '', + ]); + $this->setStatus(amiSetEntity::STATUS_PROCESSING_WITH_ERRORS, $data); + return; + } + } + if ($method == "direct") { + if (isset($data->info['row']['data']) && !is_array($data->info['row']['data'])) { + $message = $this->t('Sorry, we can not cast ADO with @uuid directly into proper Metadata. Check your data ROW in your CSV for set @setid for invalid data.',[ + '@uuid' => $data->info['row']['uuid'] ?? "MISSING UUID", + '@setid' => $data->info['set_id'] + ]); + + $this->loggerFactory->get('ami_file')->error($message ,[ + 'setid' => $data->info['set_id'] ?? NULL, + 'time_submitted' => $data->info['time_submitted'] ?? '', + ]); + $this->setStatus(amiSetEntity::STATUS_PROCESSING_WITH_ERRORS, $data); + return; + } + elseif (!isset($data->info['row']['data'])) { + $message = $this->t('Sorry, we can not cast an ADO directly into proper Metadata. Check your data ROW in your CSV for set @setid for invalid data.', + [ + '@setid' => $data->info['set_id'], + ]); + $this->loggerFactory->get('ami_file')->error($message ,[ + 'setid' => $data->info['set_id'] ?? NULL, + 'time_submitted' => $data->info['time_submitted'] ?? '', + ]); + $this->setStatus(amiSetEntity::STATUS_PROCESSING_WITH_ERRORS, $data); + return; + } + + $processed_metadata = $this->AmiUtilityService->expandJson($data->info['row']['data']); + $processed_metadata = !empty($processed_metadata) ? json_encode($processed_metadata) : NULL; + $json_error = json_last_error(); + if ($json_error !== JSON_ERROR_NONE || !$processed_metadata) { + $message = $this->t('Sorry, we can not cast ADO with @uuid directly into proper Metadata. Check your data ROW in your CSV for set @setid for invalid JSON data.',[ + '@uuid' => $data->info['row']['uuid'], + '@setid' => $data->info['set_id'] + ]); + $this->loggerFactory->get('ami_file')->error($message ,[ + 'setid' => $data->info['set_id'] ?? NULL, + 'time_submitted' => $data->info['time_submitted'] ?? '', + ]); + $this->setStatus(amiSetEntity::STATUS_PROCESSING_WITH_ERRORS, $data); + return; + } + } + } +} From 46b1dfb09d2d64e24e40badcfc4fc946ccc24a1b Mon Sep 17 00:00:00 2001 From: Diego Pino Navarro Date: Wed, 1 May 2024 12:19:59 -0400 Subject: [PATCH 02/29] make private protected so we can extend the class --- src/Plugin/QueueWorker/IngestADOQueueWorker.php | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Plugin/QueueWorker/IngestADOQueueWorker.php b/src/Plugin/QueueWorker/IngestADOQueueWorker.php index ef71bd1..0dd8109 100644 --- a/src/Plugin/QueueWorker/IngestADOQueueWorker.php +++ b/src/Plugin/QueueWorker/IngestADOQueueWorker.php @@ -103,7 +103,7 @@ class IngestADOQueueWorker extends QueueWorkerBase implements ContainerFactoryPl * * @var array */ - private CONST OP_HUMAN = [ + protected CONST OP_HUMAN = [ 'create' => 'created', 'update' => 'updated', 'patch' => 'patched', @@ -1101,7 +1101,7 @@ private function canProcess($data): bool { * @param string $status * @param \stdClass $data */ - private function setStatus(string $status, \stdClass $data) { + protected function setStatus(string $status, \stdClass $data) { try { $set_id = $data->info['set_id']; if (!empty($set_id)) { From 89899b337dfa4bdb343669f7e446f4f14b767e06 Mon Sep 17 00:00:00 2001 From: Diego Pino Navarro Date: Wed, 1 May 2024 12:20:07 -0400 Subject: [PATCH 03/29] Remove space --- src/Plugin/ImporterAdapter/GoogleSheetImporter.php | 1 - 1 file changed, 1 deletion(-) diff --git a/src/Plugin/ImporterAdapter/GoogleSheetImporter.php b/src/Plugin/ImporterAdapter/GoogleSheetImporter.php index d7f4229..f8627f6 100644 --- a/src/Plugin/ImporterAdapter/GoogleSheetImporter.php +++ b/src/Plugin/ImporterAdapter/GoogleSheetImporter.php @@ -195,7 +195,6 @@ public function getData(array $config, $page = 0, $per_page = 20):array { MessengerInterface::TYPE_ERROR ); return $tabdata; - } } catch (Google_Service_Exception $e) { $this->messenger()->addMessage( From 1f7b575a2909e6109b7e3e7188bba4d255f7c8fe Mon Sep 17 00:00:00 2001 From: Diego Pino Navarro Date: Wed, 1 May 2024 12:20:23 -0400 Subject: [PATCH 04/29] This will be reverted. But i need to test so added code for that --- src/Form/amiSetEntityProcessForm.php | 31 ++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) diff --git a/src/Form/amiSetEntityProcessForm.php b/src/Form/amiSetEntityProcessForm.php index 3513fab..0eaaf32 100644 --- a/src/Form/amiSetEntityProcessForm.php +++ b/src/Form/amiSetEntityProcessForm.php @@ -115,6 +115,10 @@ public function submitForm(array &$form, FormStateInterface $form_state) { } if ($file && $data !== new \stdClass()) { $invalid = []; + + + + $info = $this->AmiUtilityService->preprocessAmiSet($file, $data, $invalid, FALSE); // Means preprocess set if (count($invalid)) { @@ -169,6 +173,33 @@ public function submitForm(array &$form, FormStateInterface $form_state) { $op_secondary = $form_state->getValue(['ops_secondary','ops_secondary_update'], 'update'); $ops_safefiles = $form_state->getValue(['ops_secondary','ops_safefiles'], TRUE); } + $data_csv = clone $data; + + + + // Testing the CSV processor + $data_csv->info = [ + 'zip_file' => $zip_file, + 'csv_file' => $file, + 'set_id' => $this->entity->id(), + 'uid' => $this->currentUser()->id(), + 'status' => $statuses, + 'op_secondary' => $op_secondary, + 'ops_safefiles' => $ops_safefiles ? TRUE: FALSE, + 'log_jsonpatch' => FALSE, + 'set_url' => $SetURL, + 'attempt' => 1, + 'queue_name' => $queue_name, + 'force_file_queue' => (bool) $form_state->getValue('force_file_queue', FALSE), + 'force_file_process' => (bool) $form_state->getValue('force_file_process', FALSE), + 'manyfiles' => $manyfiles, + 'ops_skip_onmissing_file' => $ops_skip_onmissing_file, + 'ops_forcemanaged_destination_file' => $ops_forcemanaged_destination_file, + 'time_submitted' => $run_timestamp + ]; + \Drupal::queue('ami_csv_ado') + ->createItem($data_csv); + foreach ($info as $item) { // We set current User here since we want to be sure the final owner of // the object is this and not the user that runs the queue From 94b14211043a40ab0e13853a439c1c061ba8f9dc Mon Sep 17 00:00:00 2001 From: Diego Pino Navarro Date: Wed, 1 May 2024 14:13:58 -0400 Subject: [PATCH 05/29] Ok, now this actually enqueues. Next step remove old code --- src/Plugin/QueueWorker/CsvADOQueueWorker.php | 20 +++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/src/Plugin/QueueWorker/CsvADOQueueWorker.php b/src/Plugin/QueueWorker/CsvADOQueueWorker.php index ac4d8b1..5b49b111 100644 --- a/src/Plugin/QueueWorker/CsvADOQueueWorker.php +++ b/src/Plugin/QueueWorker/CsvADOQueueWorker.php @@ -67,7 +67,8 @@ public function processItem($data) { */ $adodata = clone $data; $adodata->info = NULL; - if (!empty($data->info['csv_file']) && !empty($data->info['file_column'])) { + $added = []; + if (!empty($data->info['csv_file'])) { $invalid = []; // Note. We won't process the nested CSV here. This queue worker only takes a CSV and splits into smaller // chunks. Basically what the \Drupal\ami\Form\amiSetEntityProcessForm::submitForm already does. @@ -96,13 +97,22 @@ public function processItem($data) { 'ops_forcemanaged_destination_file' => $data->info['ops_forcemanaged_destination_file'], 'time_submitted' => $data->info['time_submitted'], ]; - /*$added[] = \Drupal::queue($data->info['queue_name']) - ->createItem($adodata);*/ - $adodata; + $added[] = \Drupal::queue($data->info['queue_name']) + ->createItem($adodata); + //$adodata; + } + if (count($added)) { + $message = $this->t('CSV for Set @setid was expanded to ADOs',[ + '@setid' => $data->info['set_id'] + ]); + $this->loggerFactory->get('ami_file')->info($message ,[ + 'setid' => $data->info['set_id'] ?? NULL, + 'time_submitted' => $data->info['time_submitted'] ?? '', + ]); } return; } - + return; // Before we do any processing. Check if Parent(s) exists? // If not, re-enqueue: we try twice only. Should we try more? $parent_nodes = []; From a5993ad3f1623e9879e442f8285d41094e7b9044 Mon Sep 17 00:00:00 2001 From: Diego Pino Navarro Date: Wed, 1 May 2024 14:26:28 -0400 Subject: [PATCH 06/29] Set the status again on every read/split into ADO queue items CSV --- src/Plugin/QueueWorker/CsvADOQueueWorker.php | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/Plugin/QueueWorker/CsvADOQueueWorker.php b/src/Plugin/QueueWorker/CsvADOQueueWorker.php index 5b49b111..0c15a07 100644 --- a/src/Plugin/QueueWorker/CsvADOQueueWorker.php +++ b/src/Plugin/QueueWorker/CsvADOQueueWorker.php @@ -99,7 +99,6 @@ public function processItem($data) { ]; $added[] = \Drupal::queue($data->info['queue_name']) ->createItem($adodata); - //$adodata; } if (count($added)) { $message = $this->t('CSV for Set @setid was expanded to ADOs',[ @@ -110,6 +109,11 @@ public function processItem($data) { 'time_submitted' => $data->info['time_submitted'] ?? '', ]); } + $processed_set_status = $this->statusStore->get('set_' . $this->entity->id()); + $processed_set_status['processed'] = $processed_set_status['processed'] ?? 0; + $processed_set_status['errored'] = $processed_set_status['errored'] ?? 0; + $processed_set_status['total'] = $processed_set_status['total'] ?? 0 + count($added); + $this->statusStore->set('set_' . $this->entity->id(), $processed_set_status); return; } return; From 1a6f2abf8ab16f4b27fdff5fab7810d3c0764c41 Mon Sep 17 00:00:00 2001 From: Diego Pino Navarro Date: Wed, 1 May 2024 14:39:32 -0400 Subject: [PATCH 07/29] Fix set id, use the one from $data and remove old code --- src/Plugin/QueueWorker/CsvADOQueueWorker.php | 152 ++----------------- 1 file changed, 16 insertions(+), 136 deletions(-) diff --git a/src/Plugin/QueueWorker/CsvADOQueueWorker.php b/src/Plugin/QueueWorker/CsvADOQueueWorker.php index 0c15a07..3f207ae 100644 --- a/src/Plugin/QueueWorker/CsvADOQueueWorker.php +++ b/src/Plugin/QueueWorker/CsvADOQueueWorker.php @@ -27,18 +27,18 @@ * * @QueueWorker( * id = "ami_csv_ado", - * title = @Translation("AMI CSV expander and row Enqueuer Queue Worker") + * title = @Translation("AMI CSV Expander and ADO Enqueuer Queue Worker") * ) */ -class CsvADOQueueWorker extends IngestADOQueueWorker { - +class CsvADOQueueWorker extends IngestADOQueueWorker +{ /** * {@inheritdoc} */ public function processItem($data) { $log = new Logger('ami_file'); $private_path = \Drupal::service('stream_wrapper_manager')->getViaUri('private://')->getDirectoryPath(); - $handler = new StreamHandler($private_path.'/ami/logs/set'.$data->info['set_id'].'.log', Logger::DEBUG); + $handler = new StreamHandler($private_path . '/ami/logs/set' . $data->info['set_id'] . '.log', Logger::DEBUG); $handler->setFormatter(new JsonFormatter()); $log->pushHandler($handler); // This will add the File logger not replace the DB @@ -68,6 +68,8 @@ public function processItem($data) { $adodata = clone $data; $adodata->info = NULL; $added = []; + // @TODO discuss with Allison the idea that one could ingest with "AMI set" data but without an actual AMI set? + // That would require, e.g generating a fake $data->info['set_id'] if (!empty($data->info['csv_file'])) { $invalid = []; // Note. We won't process the nested CSV here. This queue worker only takes a CSV and splits into smaller @@ -81,18 +83,18 @@ public function processItem($data) { $adodata->info = [ 'zip_file' => $data->info['zip_file'] ?? NULL, 'row' => $item, - 'set_id' => $data->info['set_id'], + 'set_id' => $data->info['set_id'], 'uid' => $data->info['uid'], 'status' => $data->info['status'], 'op_secondary' => $data->info['op_secondary'] ?? NULL, - 'ops_safefiles' => $data->info['ops_safefiles'] ? TRUE: FALSE, + 'ops_safefiles' => $data->info['ops_safefiles'] ? TRUE : FALSE, 'log_jsonpatch' => FALSE, 'set_url' => $data->info['set_url'], 'attempt' => 1, - 'queue_name' => $data->info['queue_name'], - 'force_file_queue' => $data->info['force_file_queue'], + 'queue_name' => $data->info['queue_name'], + 'force_file_queue' => $data->info['force_file_queue'], 'force_file_process' => $data->info['force_file_process'], - 'manyfiles' => $data->info['manyfiles'], + 'manyfiles' => $data->info['manyfiles'], 'ops_skip_onmissing_file' => $data->info['ops_skip_onmissing_file'], 'ops_forcemanaged_destination_file' => $data->info['ops_forcemanaged_destination_file'], 'time_submitted' => $data->info['time_submitted'], @@ -101,143 +103,21 @@ public function processItem($data) { ->createItem($adodata); } if (count($added)) { - $message = $this->t('CSV for Set @setid was expanded to ADOs',[ + $message = $this->t('CSV for Set @setid was expanded to ADOs', [ '@setid' => $data->info['set_id'] ]); - $this->loggerFactory->get('ami_file')->info($message ,[ + $this->loggerFactory->get('ami_file')->info($message, [ 'setid' => $data->info['set_id'] ?? NULL, 'time_submitted' => $data->info['time_submitted'] ?? '', ]); } - $processed_set_status = $this->statusStore->get('set_' . $this->entity->id()); - $processed_set_status['processed'] = $processed_set_status['processed'] ?? 0; - $processed_set_status['errored'] = $processed_set_status['errored'] ?? 0; + $processed_set_status = $this->statusStore->get('set_' . $data->info['set_id']); + $processed_set_status['processed'] = $processed_set_status['processed'] ?? 0; + $processed_set_status['errored'] = $processed_set_status['errored'] ?? 0; $processed_set_status['total'] = $processed_set_status['total'] ?? 0 + count($added); $this->statusStore->set('set_' . $this->entity->id(), $processed_set_status); return; } return; - // Before we do any processing. Check if Parent(s) exists? - // If not, re-enqueue: we try twice only. Should we try more? - $parent_nodes = []; - if (isset($data->info['row']['parent']) && is_array($data->info['row']['parent'])) { - $parents = $data->info['row']['parent']; - $parents = array_filter($parents); - foreach($parents as $parent_property => $parent_uuid) { - $parent_uuids = (array) $parent_uuid; - // We should validate each member to be an UUID here (again). Just in case. - $existing = $this->entityTypeManager->getStorage('node')->loadByProperties(['uuid' => $parent_uuids]); - if (count($existing) != count($parent_uuids)) { - $message = $this->t('Sorry, we can not process ADO with @uuid from Set @setid yet, there are missing parents with UUID(s) @parent_uuids. We will retry.',[ - '@uuid' => $data->info['row']['uuid'], - '@setid' => $data->info['set_id'], - '@parent_uuids' => implode(',', $parent_uuids) - ]); - $this->loggerFactory->get('ami_file')->warning($message ,[ - 'setid' => $data->info['set_id'] ?? NULL, - 'time_submitted' => $data->info['time_submitted'] ?? '', - ]); - - // Pushing to the end of the queue. - $data->info['attempt']++; - if ($data->info['attempt'] < 3) { - \Drupal::queue($data->info['queue_name']) - ->createItem($data); - return; - } - else { - $message = $this->t('Sorry, We tried twice to process ADO with @uuid from Set @setid yet, but you have missing parents. Please check your CSV file and make sure parents with an UUID are in your REPO first and that no other parent generated by the set itself is failing',[ - '@uuid' => $data->info['row']['uuid'], - '@setid' => $data->info['set_id'] - ]); - $this->loggerFactory->get('ami_file')->error($message ,[ - 'setid' => $data->info['set_id'] ?? NULL, - 'time_submitted' => $data->info['time_submitted'] ?? '', - ]); - $this->setStatus(amiSetEntity::STATUS_PROCESSING_WITH_ERRORS, $data); - return; - // We could enqueue in a "failed" queue? - // @TODO for 0.6.0: Or better. We could keep track of the dependency - // and then afterwards update one and then the other - // Why? Because if we have one object pointing to X - // and the other pointing back the graph is not acyclic - // but we could still via an update operation - // Ingest without the relations both. Then update both once the - // Ingest is ready IF both have IDs. - } - } - else { - // Get the IDs! - foreach($existing as $node) { - $parent_nodes[$parent_property][] = (int) $node->id(); - } - } - } - } - - $processed_metadata = NULL; - - $method = $data->mapping->globalmapping ?? "direct"; - if ($method == 'custom') { - $method = $data->mapping->custommapping_settings->{$data->info['row']['type']}->metadata ?? "direct"; - } - if ($method == 'template') { - $processed_metadata = $this->AmiUtilityService->processMetadataDisplay($data); - if (!$processed_metadata) { - $message = $this->t('Sorry, we can not cast ADO with @uuid into proper Metadata. Check the Metadata Display Template used, your permissions and/or your data ROW in your CSV for set @setid.',[ - '@uuid' => $data->info['row']['uuid'], - '@setid' => $data->info['set_id'] - ]); - $this->loggerFactory->get('ami_file')->error($message ,[ - 'setid' => $data->info['set_id'] ?? NULL, - 'time_submitted' => $data->info['time_submitted'] ?? '', - ]); - $this->setStatus(amiSetEntity::STATUS_PROCESSING_WITH_ERRORS, $data); - return; - } - } - if ($method == "direct") { - if (isset($data->info['row']['data']) && !is_array($data->info['row']['data'])) { - $message = $this->t('Sorry, we can not cast ADO with @uuid directly into proper Metadata. Check your data ROW in your CSV for set @setid for invalid data.',[ - '@uuid' => $data->info['row']['uuid'] ?? "MISSING UUID", - '@setid' => $data->info['set_id'] - ]); - - $this->loggerFactory->get('ami_file')->error($message ,[ - 'setid' => $data->info['set_id'] ?? NULL, - 'time_submitted' => $data->info['time_submitted'] ?? '', - ]); - $this->setStatus(amiSetEntity::STATUS_PROCESSING_WITH_ERRORS, $data); - return; - } - elseif (!isset($data->info['row']['data'])) { - $message = $this->t('Sorry, we can not cast an ADO directly into proper Metadata. Check your data ROW in your CSV for set @setid for invalid data.', - [ - '@setid' => $data->info['set_id'], - ]); - $this->loggerFactory->get('ami_file')->error($message ,[ - 'setid' => $data->info['set_id'] ?? NULL, - 'time_submitted' => $data->info['time_submitted'] ?? '', - ]); - $this->setStatus(amiSetEntity::STATUS_PROCESSING_WITH_ERRORS, $data); - return; - } - - $processed_metadata = $this->AmiUtilityService->expandJson($data->info['row']['data']); - $processed_metadata = !empty($processed_metadata) ? json_encode($processed_metadata) : NULL; - $json_error = json_last_error(); - if ($json_error !== JSON_ERROR_NONE || !$processed_metadata) { - $message = $this->t('Sorry, we can not cast ADO with @uuid directly into proper Metadata. Check your data ROW in your CSV for set @setid for invalid JSON data.',[ - '@uuid' => $data->info['row']['uuid'], - '@setid' => $data->info['set_id'] - ]); - $this->loggerFactory->get('ami_file')->error($message ,[ - 'setid' => $data->info['set_id'] ?? NULL, - 'time_submitted' => $data->info['time_submitted'] ?? '', - ]); - $this->setStatus(amiSetEntity::STATUS_PROCESSING_WITH_ERRORS, $data); - return; - } - } } } From c95618b65cb2d12c040ff348cd67ce89f5913273 Mon Sep 17 00:00:00 2001 From: Diego Pino Navarro Date: Wed, 1 May 2024 15:17:59 -0400 Subject: [PATCH 08/29] Logs & logs --- src/Plugin/QueueWorker/CsvADOQueueWorker.php | 25 ++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/src/Plugin/QueueWorker/CsvADOQueueWorker.php b/src/Plugin/QueueWorker/CsvADOQueueWorker.php index 3f207ae..003bbfb 100644 --- a/src/Plugin/QueueWorker/CsvADOQueueWorker.php +++ b/src/Plugin/QueueWorker/CsvADOQueueWorker.php @@ -77,6 +77,18 @@ public function processItem($data) { // But the ADO worker itself will (new code) extract a CSV and then again, enqueue back to this so this one can yet again // split into smaller chuncks and so on. $info = $this->AmiUtilityService->preprocessAmiSet($data->info['csv_file'], $data, $invalid, FALSE); + + if (!count($info)) { + $message = $this->t('So sorry. CSV for @setid produced no ADOs. Please correct your source CSV data', [ + '@setid' => $data->info['set_id'] + ]); + $this->loggerFactory->get('ami_file')->warning($message, [ + 'setid' => $data->info['set_id'] ?? NULL, + 'time_submitted' => $data->info['time_submitted'] ?? '', + ]); + return; + } + foreach ($info as $item) { // We set current User here since we want to be sure the final owner of // the object is this and not the user that runs the queue @@ -111,6 +123,19 @@ public function processItem($data) { 'time_submitted' => $data->info['time_submitted'] ?? '', ]); } + if (count($invalid)) { + $invalid_message = $this->formatPlural(count($invalid), + 'Source data Row @row had an issue, common cause is an invalid parent.', + '@count rows, @row, had issues, common causes are invalid parents and/or non existing referenced rows.', + [ + '@row' => implode(', ', array_keys($invalid)), + ] + ); + $this->loggerFactory->get('ami_file')->warning($invalid_message, [ + 'setid' => $data->info['set_id'] ?? NULL, + 'time_submitted' => $data->info['time_submitted'] ?? '', + ]); + } $processed_set_status = $this->statusStore->get('set_' . $data->info['set_id']); $processed_set_status['processed'] = $processed_set_status['processed'] ?? 0; $processed_set_status['errored'] = $processed_set_status['errored'] ?? 0; From 4de1802830589c38dd9157212b6194e8a514aa0f Mon Sep 17 00:00:00 2001 From: Diego Pino Navarro Date: Wed, 1 May 2024 15:23:44 -0400 Subject: [PATCH 09/29] Explain what is passed to the CSV queue worker --- src/Plugin/QueueWorker/CsvADOQueueWorker.php | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/src/Plugin/QueueWorker/CsvADOQueueWorker.php b/src/Plugin/QueueWorker/CsvADOQueueWorker.php index 003bbfb..465c5b7 100644 --- a/src/Plugin/QueueWorker/CsvADOQueueWorker.php +++ b/src/Plugin/QueueWorker/CsvADOQueueWorker.php @@ -48,20 +48,28 @@ public function processItem($data) { // it simple for now. $this->loggerFactory->get('ami_file')->setLoggers([[$log]]); - /* Data info for an ADO has this structure + /* Data info for an CSV has this structure $data->info = [ + 'csv_file' => The CSV File that will (or we hope so if well formed) generate multiple ADO Queue items 'set_id' => The Set id 'uid' => The User ID that processed the Set 'set_url' => A direct URL to the set. + 'status' => Either a string (moderation state) or a 1/0 for published/unpublished if not moderated 'op_secondary' => applies only to Update/Patch operations. Can be one of 'update','replace','append' 'ops_safefiles' => Boolean, True if we will not allow files/mappings to be removed/we will keep them warm and safe 'log_jsonpatch' => If for Update operations we will generate a single PER ADO Log with a full JSON Patch, 'attempt' => The number of attempts to process. We always start with a 1 'zip_file' => Zip File/File Entity - 'csv_file' => The CSV that will generate the ADO queue items. 'queue_name' => because well ... we use Hydroponics too - 'time_submitted' => Timestamp on when the queue was sent. All Entries will share the same + 'force_file_queue' => defaults to false, will always treat files as separate queue items. + 'force_file_process' => defaults to false, will force all techmd and file fetching to happen from scratch instead of using cached versions. + 'manyfiles' => Number of files (passed by \Drupal\ami\Form\amiSetEntityProcessForm::submitForm) that will trigger queue processing for files, + 'ops_skip_onmissing_file' => Skips ADO operations if a passed/mapped file is not present, + 'ops_forcemanaged_destination_file' => Forces Archipelago to manage a files destination when the source matches the destination Schema (e.g S3), + 'time_submitted' => Timestamp on when the queue was send. All Entries will share the same ]; + + Most of this data will simply be relayed to another queue item. // This will simply go to an alternate processing on this same Queue Worker // Just for files. */ From 8c66488c22227ffdc1726043dfde56a181ccba34 Mon Sep 17 00:00:00 2001 From: Diego Pino Navarro Date: Wed, 1 May 2024 16:27:04 -0400 Subject: [PATCH 10/29] Process CSVs with less fuzz. We just need a file to pass to the queue. These files will be temporary ... so how to i make sure they are not delete? mmm.. i should be OK ... right? One choice would be to check YET again if the file passed is still there on the CSV queue worker and load if from a string there? mmmm ... --- .../QueueWorker/IngestADOQueueWorker.php | 32 +++++++++++++++++++ 1 file changed, 32 insertions(+) diff --git a/src/Plugin/QueueWorker/IngestADOQueueWorker.php b/src/Plugin/QueueWorker/IngestADOQueueWorker.php index 0dd8109..dbb56bb 100644 --- a/src/Plugin/QueueWorker/IngestADOQueueWorker.php +++ b/src/Plugin/QueueWorker/IngestADOQueueWorker.php @@ -395,16 +395,29 @@ public function processItem($data) { if ($data->mapping->globalmapping == "custom") { $file_object = $data->mapping->custommapping_settings->{$data->info['row']['type']}->files ?? NULL; + $csv_file_object = $data->mapping->custommapping_settings->{$data->info['row']['type']}->files_csv ?? NULL; } else { $file_object = $data->mapping->globalmapping_settings->files ?? NULL; + $csv_file_object = $data->mapping->globalmapping_settings->files_csv ?? NULL; } $file_columns = []; + $file_csv_columns = []; + + $ado_columns = []; if ($file_object && is_object($file_object)) { $file_columns = array_values(get_object_vars($file_object)); } + // CSV (nested ones) can not be processed as "pre-files", but still need to be processed as files. + // There might be an edge case where the user decides that the CSV that generated the children + // Should also be attached to the parent ADO.Still, we need to be sure the ADO itself was ingesed + // before treating the CSV as a source for children objects. + + if ($csv_file_object && is_object($csv_file_object)) { + $file_csv_columns = array_values(get_object_vars($csv_file_object)); + } if ($ado_object && is_object($ado_object)) { $ado_columns = array_values(get_object_vars($ado_object)); @@ -1023,6 +1036,25 @@ protected function processFile($data) { } } + /** + * Processes a CSV File without technical metadata. This is just for the purpose of input for the CSV queue worker + * + * @param mixed $data + */ + protected function processCSvFile($data): \Drupal\Core\Entity\EntityInterface|\Drupal\file\Entity\File|null + { + $zip_file_id = is_object($data->info['zip_file']) && $data->info['zip_file'] instanceof FileInterface ? (string) $data->info['zip_file']->id() : '0'; + $file = $this->AmiUtilityService->file_get(trim($data->info['filename']), + $data->info['zip_file'], TRUE); + if ($file && $file->getMimeType() == 'application/csv') { + return $file; + } + else { + return NULL; + } + + } + /** * Checks if processing can be done so we can bail out sooner. From 55a5a6be98ee60a76a3fdc462bd9cde74f763411 Mon Sep 17 00:00:00 2001 From: Diego Pino Navarro Date: Wed, 1 May 2024 17:28:22 -0400 Subject: [PATCH 11/29] Processes CSV after the Row was correctly processed @alliomeria there is a question for you at line 583. I have tons of those. For now i assuming "only process children" if the row containing it was processed. But what if we want to process anyways? Also. another issue i will encounter is: Delete processed ADOs... i will have to parse again each CSV attached to the main one? --- src/Plugin/QueueWorker/CsvADOQueueWorker.php | 1 + .../QueueWorker/IngestADOQueueWorker.php | 46 ++++++++++++++++--- 2 files changed, 41 insertions(+), 6 deletions(-) diff --git a/src/Plugin/QueueWorker/CsvADOQueueWorker.php b/src/Plugin/QueueWorker/CsvADOQueueWorker.php index 465c5b7..ceacf45 100644 --- a/src/Plugin/QueueWorker/CsvADOQueueWorker.php +++ b/src/Plugin/QueueWorker/CsvADOQueueWorker.php @@ -51,6 +51,7 @@ public function processItem($data) { /* Data info for an CSV has this structure $data->info = [ 'csv_file' => The CSV File that will (or we hope so if well formed) generate multiple ADO Queue items + 'csv_file_name' => Only present if this is called not from the root 'set_id' => The Set id 'uid' => The User ID that processed the Set 'set_url' => A direct URL to the set. diff --git a/src/Plugin/QueueWorker/IngestADOQueueWorker.php b/src/Plugin/QueueWorker/IngestADOQueueWorker.php index dbb56bb..a3b3f80 100644 --- a/src/Plugin/QueueWorker/IngestADOQueueWorker.php +++ b/src/Plugin/QueueWorker/IngestADOQueueWorker.php @@ -399,7 +399,7 @@ public function processItem($data) { } else { $file_object = $data->mapping->globalmapping_settings->files ?? NULL; - $csv_file_object = $data->mapping->globalmapping_settings->files_csv ?? NULL; + $csv_file_object = $data->mapping->globalmapping_settings->files_csv ?? NULL; } $file_columns = []; @@ -578,7 +578,37 @@ public function processItem($data) { return; } // Only persist if we passed this. - $this->persistEntity($data, $processed_metadata); + // True if all ok, to the best of our knowledge of course + $persisted = $this->persistEntity($data, $processed_metadata); + // @TODO another question for @alliomera. If i don't check if the ROW was ingested.. we could process a Child CSV + // EVEN if the parent object was A) there already OR, was non even valid. That could be useful? + // A Processing setting like "don't process if file missing?" + if ($persisted && !empty($file_csv_columns)) { + $current_uuid = $data->info['row']['uuid'] ?? NULL; + $current_row_id = $data->info['row']['row_id'] ?? NULL; + $data_csv = clone $data; + unset($data_csv->info['row']); + foreach ($file_csv_columns as $file_csv_column) { + if (isset($data->info['row']['data'][$file_csv_column]) && strlen(trim($data->info['row']['data'][$file_csv_column])) >= 5) { + $filenames = trim($data->info['row']['data'][$file_csv_column]); + $filenames = array_map(function($value) { + $value = $value ?? ''; + return trim($value); + }, explode(';', $filenames)); + $filenames = array_filter($filenames); + foreach($filenames as $filename) { + $data_csv->info['csv_filename'] = $filename; + $csv_file = $this->processCSvFile($data_csv); + if ($csv_file) { + $data_csv->info['csv_file'] = $csv_file; + // Push to the CSV queue + \Drupal::queue('ami_csv_ado') + ->createItem($data_csv); + } + } + } + } + } } @@ -614,7 +644,7 @@ private function isRemote($uri) { private function persistEntity(\stdClass $data, array $processed_metadata) { if (!$this->canProcess($data)) { - return; + return FALSE; } //OP can be one of: @@ -890,6 +920,7 @@ function str_starts_with($haystack, $needle) { 'time_submitted' => $data->info['time_submitted'] ?? '', ]); $this->setStatus(amiSetEntity::STATUS_PROCESSING, $data); + return TRUE; } catch (\Exception $exception) { $message = $this->t('Sorry we did all right but failed @ophuman the ADO with UUID @uuid on Set @setid. Something went wrong. Please check your Drupal Logs and notify your admin.',[ @@ -902,7 +933,7 @@ function str_starts_with($haystack, $needle) { 'time_submitted' => $data->info['time_submitted'] ?? '', ]); $this->setStatus(amiSetEntity::STATUS_PROCESSING_WITH_ERRORS, $data); - return; + return FALSE; } } else { @@ -916,6 +947,7 @@ function str_starts_with($haystack, $needle) { 'time_submitted' => $data->info['time_submitted'] ?? '', ]); $this->setStatus(amiSetEntity::STATUS_PROCESSING_WITH_ERRORS, $data); + return FALSE; } } @@ -1043,8 +1075,11 @@ protected function processFile($data) { */ protected function processCSvFile($data): \Drupal\Core\Entity\EntityInterface|\Drupal\file\Entity\File|null { + if (!($data->info['csv_filename'] ?? NULL)) { + return NULL; + } $zip_file_id = is_object($data->info['zip_file']) && $data->info['zip_file'] instanceof FileInterface ? (string) $data->info['zip_file']->id() : '0'; - $file = $this->AmiUtilityService->file_get(trim($data->info['filename']), + $file = $this->AmiUtilityService->file_get(trim($data->info['csv_filename']), $data->info['zip_file'], TRUE); if ($file && $file->getMimeType() == 'application/csv') { return $file; @@ -1052,7 +1087,6 @@ protected function processCSvFile($data): \Drupal\Core\Entity\EntityInterface|\D else { return NULL; } - } From 7ab228d0f7b4edc9547b8403d2ef53860eb8c245 Mon Sep 17 00:00:00 2001 From: Diego Pino Navarro Date: Wed, 1 May 2024 18:00:13 -0400 Subject: [PATCH 12/29] Remove some old spaces in the Solr Importer Irrelevant but my eye (left one) was twitching --- src/Plugin/ImporterAdapter/SolrImporter.php | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/Plugin/ImporterAdapter/SolrImporter.php b/src/Plugin/ImporterAdapter/SolrImporter.php index d062994..311082e 100644 --- a/src/Plugin/ImporterAdapter/SolrImporter.php +++ b/src/Plugin/ImporterAdapter/SolrImporter.php @@ -489,8 +489,6 @@ public function getInfo(array $config, FormStateInterface $form_state, $page = 0 } $filename_columns = array_unique(array_merge(static::FILE_COLUMNS,$filename_columns)); - - $solr_config = [ 'endpoint' => [ 'amiremote' => [ From 3aeb7fc8226ebc3b5e55affde82883b25193d63c Mon Sep 17 00:00:00 2001 From: Diego Pino Navarro Date: Wed, 1 May 2024 18:01:15 -0400 Subject: [PATCH 13/29] Starts the actual Importer Note: I will also allow any plugin to set Fixed values and skip steps of the AMI setup main form That way we can totally avoid user input on things that need to be one way (like here) --- src/Plugin/ImporterAdapter/EADImporter.php | 235 +++++++++++++++++++++ 1 file changed, 235 insertions(+) create mode 100644 src/Plugin/ImporterAdapter/EADImporter.php diff --git a/src/Plugin/ImporterAdapter/EADImporter.php b/src/Plugin/ImporterAdapter/EADImporter.php new file mode 100644 index 0000000..74ccd98 --- /dev/null +++ b/src/Plugin/ImporterAdapter/EADImporter.php @@ -0,0 +1,235 @@ +streamWrapperManager = $streamWrapperManager; + $this->tempFile = NULL; + register_shutdown_function([$this, 'shutdown']); + } + + /** + * {@inheritdoc} + */ + public static function create(ContainerInterface $container, array $configuration, $plugin_id, $plugin_definition) { + return new static( + $configuration, + $plugin_id, + $plugin_definition, + $container->get('entity_type.manager'), + $container->get('stream_wrapper_manager'), + $container->get('ami.utility') + ); + } + + + use StringTranslationTrait; + + /** + * {@inheritdoc} + */ + public function interactiveForm(array $parents, FormStateInterface $form_state):array { + $form = []; + $form = parent::interactiveForm($parents,$form_state); + $form['file'] = [ + '#type' => 'managed_file', + '#default_value' => $form_state->getValue(array_merge($parents , ['file'])), + '#title' => $this->t('Upload your file'), + '#description' => $this->t('The Spreadsheet file containing your EAD records.'), + '#required' => TRUE, + '#upload_location' => 'public://', + '#upload_validators' => [ + 'file_validate_extensions' => ['csv xls xlsx xlst tsv'], + ], + ]; + + return $form; + } + + + /** + * {@inheritdoc} + */ + public function getData(array $config, $page = 0, $per_page = 20): array { + $data = parent::getData($config, $page, $per_page); + $offset = $page * $per_page; + $tabdata = ['headers' => [], 'data' => $data, 'totalrows' => 0]; + + /* @var File $file */ + $file = $this->entityTypeManager->getStorage('file') + ->load($config['file'][0]); + if (!$file) { + $this->messenger()->addMessage( + $this->t( + 'Could not load the file. Please check your Drupal logs or contact your Repository Admin' + ) + ); + return $tabdata; + } + + $response = $this->AmiUtilityService->ensureFileAvailability($file); + + if ($response === TRUE) { + $file_path = $this->streamWrapperManager->getViaUri($file->getFileUri())->realpath(); + $this->streamWrapperManager->getViaUri($file->getFileUri())->getUri(); + } + elseif ($response === FALSE) { + $this->messenger()->addMessage( + $this->t( + 'Could not copy source file to a local location. Please check your Filesystem Permissions, Drupal logs or contact your Repository Admin' + ) + ); + return $tabdata; + } + else { + $this->tempFile = $response; + $file_path = $response; + } + + try { + $inputFileType = IOFactory::identify($file_path); + // Because of \PhpOffice\PhpSpreadsheet\Cell\DataType::checkString we can + // Not use this library for CSVs that contain large JSONs + // Since we do not know if they contain that, we will + // assume so (maybe a user choice in the future) + if ($inputFileType == 'Csv') { + return $this->AmiUtilityService->csv_read($file, 0, 0, TRUE) ?? $tabdata; + } + $objReader = IOFactory::createReader($inputFileType); + $objReader->setReadDataOnly(TRUE); + $objPHPExcel = $objReader->load($file_path); + } + catch (\Exception $e) { + $this->messenger()->addMessage( + $this->t( + 'Could not parse file with error: @error', + ['@error' => $e->getMessage()] + ) + ); + return $tabdata; + } + + $table = []; + $headers = []; + $maxRow = 0; + $worksheet = $objPHPExcel->getActiveSheet(); + $highestRow = $worksheet->getHighestRow(); + $highestColumn = $worksheet->getHighestDataColumn(1); + + if (($highestRow) > 1) { + // Returns Row Headers. + $rowHeaders = $worksheet->rangeToArray( + 'A1:' . $highestColumn . '1', + NULL, + TRUE, + TRUE, + FALSE + ); + $rowHeaders_utf8 = array_map('stripslashes', $rowHeaders[0]); + $rowHeaders_utf8 = array_map('utf8_encode', $rowHeaders_utf8); + $rowHeaders_utf8 = array_map('strtolower', $rowHeaders_utf8); + $rowHeaders_utf8 = array_map('trim', $rowHeaders_utf8); + $rowHeaders_utf8 = array_filter($rowHeaders_utf8); + $headercount = count($rowHeaders_utf8); + + foreach ($worksheet->getRowIterator() as $row) { + $rowindex = $row->getRowIndex(); + if (($rowindex > 1) && ($rowindex > ($offset)) && (($rowindex <= ($offset + $per_page + 1)) || $per_page == -1)) { + $rowdata = []; + // gets one row data + $datarow = $worksheet->rangeToArray( + "A{$rowindex}:" . $highestColumn . $rowindex, + NULL, + TRUE, + TRUE, + FALSE + ); + $flat = trim(implode('', $datarow[0])); + //check for empty row...if found stop there. + if (strlen($flat) == 0) { + $maxRow = $rowindex; + break; + } + + $row = $this->AmiUtilityService->arrayEquallySeize( + $headercount, + $datarow[0] + ); + $table[$rowindex] = $row; + } + $maxRow = $rowindex; + } + } + $tabdata = [ + 'headers' => $rowHeaders_utf8, + 'data' => $table, + 'totalrows' => $maxRow, + ]; + $objPHPExcel->disconnectWorksheets(); + return $tabdata; + } + + public function getInfo(array $config, FormStateInterface $form_state, $page = 0, $per_page = 20): array { + return $this->getData($config, $page, $per_page); + } + + /** + * Shutdown that "should" clean temp file if one was generated + */ + public function shutdown() { + // on PHP-FPM there will be never output of this one.. + if ($this->tempFile !== NULL) { + $this->AmiUtilityService->cleanUpTemp($this->tempFile); + } + } +} From dc2ac75c47809447b818aed69ea999f3a898ecfb Mon Sep 17 00:00:00 2001 From: Diego Pino Navarro Date: Thu, 2 May 2024 10:39:58 -0400 Subject: [PATCH 14/29] basic stub, so far does nothing different than the normal spreadsheet importer Except it forces CSV and tries to save in private ... if that goes well i might shift all to private --- src/Plugin/ImporterAdapter/EADImporter.php | 73 ++-------------------- 1 file changed, 4 insertions(+), 69 deletions(-) diff --git a/src/Plugin/ImporterAdapter/EADImporter.php b/src/Plugin/ImporterAdapter/EADImporter.php index 74ccd98..907c06e 100644 --- a/src/Plugin/ImporterAdapter/EADImporter.php +++ b/src/Plugin/ImporterAdapter/EADImporter.php @@ -85,11 +85,11 @@ public function interactiveForm(array $parents, FormStateInterface $form_state): '#type' => 'managed_file', '#default_value' => $form_state->getValue(array_merge($parents , ['file'])), '#title' => $this->t('Upload your file'), - '#description' => $this->t('The Spreadsheet file containing your EAD records.'), + '#description' => $this->t('The CSV file containing your EAD records.'), '#required' => TRUE, - '#upload_location' => 'public://', + '#upload_location' => 'private://', '#upload_validators' => [ - 'file_validate_extensions' => ['csv xls xlsx xlst tsv'], + 'file_validate_extensions' => ['csv'], ], ]; @@ -135,19 +135,13 @@ public function getData(array $config, $page = 0, $per_page = 20): array { $this->tempFile = $response; $file_path = $response; } - try { $inputFileType = IOFactory::identify($file_path); // Because of \PhpOffice\PhpSpreadsheet\Cell\DataType::checkString we can // Not use this library for CSVs that contain large JSONs // Since we do not know if they contain that, we will // assume so (maybe a user choice in the future) - if ($inputFileType == 'Csv') { - return $this->AmiUtilityService->csv_read($file, 0, 0, TRUE) ?? $tabdata; - } - $objReader = IOFactory::createReader($inputFileType); - $objReader->setReadDataOnly(TRUE); - $objPHPExcel = $objReader->load($file_path); + return $this->AmiUtilityService->csv_read($file, 0, 0, TRUE) ?? $tabdata; } catch (\Exception $e) { $this->messenger()->addMessage( @@ -158,65 +152,6 @@ public function getData(array $config, $page = 0, $per_page = 20): array { ); return $tabdata; } - - $table = []; - $headers = []; - $maxRow = 0; - $worksheet = $objPHPExcel->getActiveSheet(); - $highestRow = $worksheet->getHighestRow(); - $highestColumn = $worksheet->getHighestDataColumn(1); - - if (($highestRow) > 1) { - // Returns Row Headers. - $rowHeaders = $worksheet->rangeToArray( - 'A1:' . $highestColumn . '1', - NULL, - TRUE, - TRUE, - FALSE - ); - $rowHeaders_utf8 = array_map('stripslashes', $rowHeaders[0]); - $rowHeaders_utf8 = array_map('utf8_encode', $rowHeaders_utf8); - $rowHeaders_utf8 = array_map('strtolower', $rowHeaders_utf8); - $rowHeaders_utf8 = array_map('trim', $rowHeaders_utf8); - $rowHeaders_utf8 = array_filter($rowHeaders_utf8); - $headercount = count($rowHeaders_utf8); - - foreach ($worksheet->getRowIterator() as $row) { - $rowindex = $row->getRowIndex(); - if (($rowindex > 1) && ($rowindex > ($offset)) && (($rowindex <= ($offset + $per_page + 1)) || $per_page == -1)) { - $rowdata = []; - // gets one row data - $datarow = $worksheet->rangeToArray( - "A{$rowindex}:" . $highestColumn . $rowindex, - NULL, - TRUE, - TRUE, - FALSE - ); - $flat = trim(implode('', $datarow[0])); - //check for empty row...if found stop there. - if (strlen($flat) == 0) { - $maxRow = $rowindex; - break; - } - - $row = $this->AmiUtilityService->arrayEquallySeize( - $headercount, - $datarow[0] - ); - $table[$rowindex] = $row; - } - $maxRow = $rowindex; - } - } - $tabdata = [ - 'headers' => $rowHeaders_utf8, - 'data' => $table, - 'totalrows' => $maxRow, - ]; - $objPHPExcel->disconnectWorksheets(); - return $tabdata; } public function getInfo(array $config, FormStateInterface $form_state, $page = 0, $per_page = 20): array { From b6c5dc2fe86a79623c6cd9ba48b565239d89343e Mon Sep 17 00:00:00 2001 From: Diego Pino Navarro Date: Thu, 2 May 2024 18:21:12 -0400 Subject: [PATCH 15/29] Just clean up old comments and move a getter up a bit --- src/Form/AmiMultiStepIngest.php | 23 ++++++----------------- 1 file changed, 6 insertions(+), 17 deletions(-) diff --git a/src/Form/AmiMultiStepIngest.php b/src/Form/AmiMultiStepIngest.php index 1a2cb35..71793ec 100644 --- a/src/Form/AmiMultiStepIngest.php +++ b/src/Form/AmiMultiStepIngest.php @@ -129,13 +129,17 @@ public function buildForm(array $form, FormStateInterface $form_state) { 'template' => 'Template', ]; $template = $this->getMetadatadisplays(); - // $webform = $this->getWebforms(); $bundle = $this->getBundlesAndFields(); $global_metadata_options = $metadata + ['custom' => 'Custom (Expert Mode)']; //Each row (based on its type column) can have its own approach setup(expert mode) $element_conditional = []; $element = []; + // Get all headers and check for a 'type' key first, if not allow the user to select one? + // Wonder if we can be strict about this and simply require always a "type"? + // @TODO WE need to check for 'type' always. Maybe even in the submit handler? + $alltypes = $plugin_instance->provideTypes($pluginconfig, $data); + $element['bundle'] =[ '#type' => 'select', '#title' => $this->t('Fields and Bundles'), @@ -150,15 +154,6 @@ public function buildForm(array $form, FormStateInterface $form_state) { '#description' => $this->t('Columns will be casted to ADO metadata (JSON) using a Twig template setup for JSON output'), ]; - /** - * $element_conditional['webform'] = [ - * '#type' => 'select', - * '#title' => $this->t('Webform'), - * '#options' => $webform, - * '#description' => $this->t('Columns are casted to ADO metadata (JSON) by passing/validating Data through an existing Webform'), - * ]; - */ - $form['ingestsetup']['globalmapping'] = [ '#type' => 'select', '#title' => $this->t('Select the data transformation approach'), @@ -202,10 +197,6 @@ public function buildForm(array $form, FormStateInterface $form_state) { ], ]; - // Get all headers and check for a 'type' key first, if not allow the user to select one? - // Wonder if we can be strict about this and simply require always a "type"? - // @TODO WE need to check for 'type' always. Maybe even in the submit handler? - $alltypes = $plugin_instance->provideTypes($pluginconfig, $data); if (!empty($alltypes)) { $form['ingestsetup']['custommapping'] = [ '#type' => 'fieldset', @@ -473,12 +464,10 @@ public function validateMapping(array &$form, FormStateInterface $form_state) { public function submitForm(array &$form, FormStateInterface $form_state) { parent::submitForm($form, $form_state); if ($form_state->getValue('plugin', NULL)) { - if ($this->store->get('plugin') != $form_state->getValue('plugin', NULL)) { $this->store->set('pluginconfig',[]); } $this->store->set('plugin', $form_state->getValue('plugin')); - } if ($form_state->getValue('pluginconfig', [])) { $this->store->set('pluginconfig', $form_state->getValue('pluginconfig')); @@ -494,7 +483,7 @@ public function submitForm(array &$form, FormStateInterface $form_state) { $ready = $form_state->getValue('pluginconfig')['ready'] ?? TRUE; $op = $form_state->getValue('pluginconfig')['op'] ?? 'create'; if (!$ready) { - // Back yo Step 2 until the Plugin is ready doing its thing. + // Back to Step 2 until the Plugin is ready doing its thing. $this->step = 2; $form_state->setRebuild(); } From 57ef3f196c9f144b1388094a7daefb2c8e4bcc8c Mon Sep 17 00:00:00 2001 From: Diego Pino Navarro Date: Thu, 2 May 2024 18:21:44 -0400 Subject: [PATCH 16/29] REmove wrong constructor. We are extending spreadsheet --- src/Plugin/ImporterAdapter/EADImporter.php | 54 ++++------------------ 1 file changed, 10 insertions(+), 44 deletions(-) diff --git a/src/Plugin/ImporterAdapter/EADImporter.php b/src/Plugin/ImporterAdapter/EADImporter.php index 907c06e..cf21e9b 100644 --- a/src/Plugin/ImporterAdapter/EADImporter.php +++ b/src/Plugin/ImporterAdapter/EADImporter.php @@ -29,50 +29,6 @@ class EADImporter extends SpreadsheetImporter { use StringTranslationTrait; use MessengerTrait; - /** - * @var \Drupal\Core\StreamWrapper\StreamWrapperManagerInterface - */ - protected StreamWrapperManagerInterface $streamWrapperManager; - - /** - * @var string|null - */ - protected ?string $tempFile; - - /** - * SpreadsheetImporter constructor. - * - * @param array $configuration - * @param $plugin_id - * @param $plugin_definition - * @param \Drupal\Core\Entity\EntityTypeManagerInterface $entityTypeManager - * @param \Drupal\Core\StreamWrapper\StreamWrapperManagerInterface $streamWrapperManager - * @param \Drupal\ami\AmiUtilityService $ami_utility - * - * @throws \Drupal\Component\Plugin\Exception\PluginException - */ - public function __construct(array $configuration, $plugin_id, $plugin_definition, EntityTypeManagerInterface $entityTypeManager, StreamWrapperManagerInterface $streamWrapperManager, AmiUtilityService $ami_utility) { - parent::__construct($configuration, $plugin_id, $plugin_definition, $entityTypeManager, $ami_utility); - $this->streamWrapperManager = $streamWrapperManager; - $this->tempFile = NULL; - register_shutdown_function([$this, 'shutdown']); - } - - /** - * {@inheritdoc} - */ - public static function create(ContainerInterface $container, array $configuration, $plugin_id, $plugin_definition) { - return new static( - $configuration, - $plugin_id, - $plugin_definition, - $container->get('entity_type.manager'), - $container->get('stream_wrapper_manager'), - $container->get('ami.utility') - ); - } - - use StringTranslationTrait; /** @@ -158,6 +114,16 @@ public function getInfo(array $config, FormStateInterface $form_state, $page = 0 return $this->getData($config, $page, $per_page); } + public function provideKeys(array $config, array $data): array + { + // These are our discussed types. No flexibility here. + return [ + 'ArchiveContainer' => 'ArchiveContainer', + 'ArchiveComponent' => 'ArchiveComponent', + ]; + } + + /** * Shutdown that "should" clean temp file if one was generated */ From ff97f7c1aa114c415c57eeb49f5a1421d011af08 Mon Sep 17 00:00:00 2001 From: Diego Pino Navarro Date: Thu, 2 May 2024 18:22:13 -0400 Subject: [PATCH 17/29] Old comment. Just a comment but Spring clean ups are kay Diego --- src/Plugin/ImporterAdapter/SolrImporter.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Plugin/ImporterAdapter/SolrImporter.php b/src/Plugin/ImporterAdapter/SolrImporter.php index 311082e..1ae254f 100644 --- a/src/Plugin/ImporterAdapter/SolrImporter.php +++ b/src/Plugin/ImporterAdapter/SolrImporter.php @@ -66,7 +66,7 @@ class SolrImporter extends SpreadsheetImporter { protected $httpClient; /** - * GoogleSheetImporter constructor. + * Solr Importer constructor. * * @param array $configuration * @param $plugin_id From d4a346518b72337f384044f2d524472167a9b7f5 Mon Sep 17 00:00:00 2001 From: Diego Pino Navarro Date: Thu, 2 May 2024 19:23:30 -0400 Subject: [PATCH 18/29] Use the new CSV worker for background No longer waiting for all CSV rows to be read if that is the case. --- src/Form/amiSetEntityProcessForm.php | 136 +++++++++++++-------------- 1 file changed, 67 insertions(+), 69 deletions(-) diff --git a/src/Form/amiSetEntityProcessForm.php b/src/Form/amiSetEntityProcessForm.php index 0eaaf32..6e54085 100644 --- a/src/Form/amiSetEntityProcessForm.php +++ b/src/Form/amiSetEntityProcessForm.php @@ -115,35 +115,6 @@ public function submitForm(array &$form, FormStateInterface $form_state) { } if ($file && $data !== new \stdClass()) { $invalid = []; - - - - - $info = $this->AmiUtilityService->preprocessAmiSet($file, $data, $invalid, FALSE); - // Means preprocess set - if (count($invalid)) { - $invalid_message = $this->formatPlural(count($invalid), - 'Source data Row @row had an issue, common cause is an invalid parent.', - '@count rows, @row, had issues, common causes are invalid parents and/or non existing referenced rows.', - [ - '@row' => implode(', ', array_keys($invalid)), - ] - ); - $this->messenger()->addWarning($invalid_message); - } - if (!count($info)) { - $this->messenger()->addError( - $this->t( - 'So Sorry. Ami Set @label produced no ADOs. Please correct your source CSV data.', - [ - '@label' => $this->entity->label(), - ] - ) - ); - $form_state->setRebuild(); - return; - } - $SetURL = $this->entity->toUrl('canonical', ['absolute' => TRUE]) ->toString(); @@ -173,61 +144,87 @@ public function submitForm(array &$form, FormStateInterface $form_state) { $op_secondary = $form_state->getValue(['ops_secondary','ops_secondary_update'], 'update'); $ops_safefiles = $form_state->getValue(['ops_secondary','ops_safefiles'], TRUE); } - $data_csv = clone $data; - - - - // Testing the CSV processor - $data_csv->info = [ - 'zip_file' => $zip_file, - 'csv_file' => $file, - 'set_id' => $this->entity->id(), - 'uid' => $this->currentUser()->id(), - 'status' => $statuses, - 'op_secondary' => $op_secondary, - 'ops_safefiles' => $ops_safefiles ? TRUE: FALSE, - 'log_jsonpatch' => FALSE, - 'set_url' => $SetURL, - 'attempt' => 1, - 'queue_name' => $queue_name, - 'force_file_queue' => (bool) $form_state->getValue('force_file_queue', FALSE), - 'force_file_process' => (bool) $form_state->getValue('force_file_process', FALSE), - 'manyfiles' => $manyfiles, - 'ops_skip_onmissing_file' => $ops_skip_onmissing_file, - 'ops_forcemanaged_destination_file' => $ops_forcemanaged_destination_file, - 'time_submitted' => $run_timestamp - ]; - \Drupal::queue('ami_csv_ado') - ->createItem($data_csv); - - foreach ($info as $item) { - // We set current User here since we want to be sure the final owner of - // the object is this and not the user that runs the queue - $data->info = [ + if ($notprocessnow) { + $data_csv = clone $data; + // Testing the CSV processor + $data_csv->info = [ 'zip_file' => $zip_file, - 'row' => $item, + 'csv_file' => $file, 'set_id' => $this->entity->id(), 'uid' => $this->currentUser()->id(), 'status' => $statuses, 'op_secondary' => $op_secondary, - 'ops_safefiles' => $ops_safefiles ? TRUE: FALSE, + 'ops_safefiles' => $ops_safefiles ? TRUE : FALSE, 'log_jsonpatch' => FALSE, 'set_url' => $SetURL, 'attempt' => 1, 'queue_name' => $queue_name, - 'force_file_queue' => (bool) $form_state->getValue('force_file_queue', FALSE), - 'force_file_process' => (bool) $form_state->getValue('force_file_process', FALSE), + 'force_file_queue' => (bool)$form_state->getValue('force_file_queue', FALSE), + 'force_file_process' => (bool)$form_state->getValue('force_file_process', FALSE), 'manyfiles' => $manyfiles, 'ops_skip_onmissing_file' => $ops_skip_onmissing_file, 'ops_forcemanaged_destination_file' => $ops_forcemanaged_destination_file, 'time_submitted' => $run_timestamp ]; - $added[] = \Drupal::queue($queue_name) - ->createItem($data); + \Drupal::queue('ami_csv_ado') + ->createItem($data_csv); + } + else { + $info = $this->AmiUtilityService->preprocessAmiSet($file, $data, $invalid, FALSE); + // Means preprocess set + if (count($invalid)) { + $invalid_message = $this->formatPlural(count($invalid), + 'Source data Row @row had an issue, common cause is an invalid parent.', + '@count rows, @row, had issues, common causes are invalid parents and/or non existing referenced rows.', + [ + '@row' => implode(', ', array_keys($invalid)), + ] + ); + $this->messenger()->addWarning($invalid_message); + } + if (!count($info)) { + $this->messenger()->addError( + $this->t( + 'So Sorry. Ami Set @label produced no ADOs. Please correct your source CSV data.', + [ + '@label' => $this->entity->label(), + ] + ) + ); + $form_state->setRebuild(); + return; + } + + + foreach ($info as $item) { + // We set current User here since we want to be sure the final owner of + // the object is this and not the user that runs the queue + $data->info = [ + 'zip_file' => $zip_file, + 'row' => $item, + 'set_id' => $this->entity->id(), + 'uid' => $this->currentUser()->id(), + 'status' => $statuses, + 'op_secondary' => $op_secondary, + 'ops_safefiles' => $ops_safefiles ? TRUE : FALSE, + 'log_jsonpatch' => FALSE, + 'set_url' => $SetURL, + 'attempt' => 1, + 'queue_name' => $queue_name, + 'force_file_queue' => (bool)$form_state->getValue('force_file_queue', FALSE), + 'force_file_process' => (bool)$form_state->getValue('force_file_process', FALSE), + 'manyfiles' => $manyfiles, + 'ops_skip_onmissing_file' => $ops_skip_onmissing_file, + 'ops_forcemanaged_destination_file' => $ops_forcemanaged_destination_file, + 'time_submitted' => $run_timestamp + ]; + $added[] = \Drupal::queue($queue_name) + ->createItem($data); + } + $count = count(array_filter($added)); } - $count = count(array_filter($added)); - if ($notprocessnow && $count) { + if ($notprocessnow) { $this->messenger()->addMessage( $this->t( 'Set @label enqueued and processed .', @@ -239,7 +236,8 @@ public function submitForm(array &$form, FormStateInterface $form_state) { $processed_set_status['processed'] = 0; $processed_set_status['errored'] = 0; - $processed_set_status['total'] = $count; + $processed_set_status['total'] = 0; + // So far here, with the new CSV enqueue plugin we have no idea how many. But the CSV queue entry will fill up the gap $this->statusStore->set('set_' . $this->entity->id(), $processed_set_status); $this->entity->setStatus(amiSetEntity::STATUS_ENQUEUED); $this->entity->save(); From c281a3d897157da97323029988bd20cbf093db6d Mon Sep 17 00:00:00 2001 From: Diego Pino Navarro Date: Fri, 3 May 2024 15:05:20 -0400 Subject: [PATCH 19/29] Add the option of altering both step forms and the step storage --- src/Plugin/ImporterAdapterBase.php | 18 ++++++++++++++++ src/Plugin/ImporterAdapterInterface.php | 28 +++++++++++++++++++++++++ 2 files changed, 46 insertions(+) diff --git a/src/Plugin/ImporterAdapterBase.php b/src/Plugin/ImporterAdapterBase.php index c8c64a8..77c852b 100644 --- a/src/Plugin/ImporterAdapterBase.php +++ b/src/Plugin/ImporterAdapterBase.php @@ -13,6 +13,7 @@ use Drupal\Core\StringTranslation\StringTranslationTrait; use Drupal\ami\Entity\ImporterAdapterInterface; use Drupal\ami\Plugin\ImporterAdapterInterface as ImporterPluginAdapterInterface ; +use Drupal\Core\TempStore\PrivateTempStore; use Symfony\Component\DependencyInjection\ContainerInterface; use Drupal\file\Entity\File; @@ -159,5 +160,22 @@ public function provideTypes(array $config, array $data): array { return $alltypes; } + /** + * During a Multistep Ingest Form Setup we can alter any steps/generated data + * @see \Drupal\ami\Form\AmiMultiStepIngestBaseForm + * + * @param $step + * @param PrivateTempStore $store + * @return void + */ + public function alterStepStore($step, PrivateTempStore $store):void { + + } + + public function stepFormAlter(&$form, FormStateInterface $form_state, PrivateTempStore $store, $step): void + { + // TODO: Implement stepFormAlter() method. + } + } diff --git a/src/Plugin/ImporterAdapterInterface.php b/src/Plugin/ImporterAdapterInterface.php index 815fba6..8213693 100644 --- a/src/Plugin/ImporterAdapterInterface.php +++ b/src/Plugin/ImporterAdapterInterface.php @@ -5,6 +5,7 @@ use Drupal\ami\Plugin\ImporterAdapterInterface as ImporterPluginAdapterInterface; use Drupal\Component\Plugin\PluginInspectionInterface; use Drupal\Core\Form\FormStateInterface; +use Drupal\Core\TempStore\PrivateTempStore; use Drupal\file\Entity\File; /** @@ -51,6 +52,22 @@ public function settingsForm(array $parents, FormStateInterface $form_state): ar public function interactiveForm(array $parents, FormStateInterface $form_state): array; + /** + * Allows the Step form to be altered by reference. + * + * @param $form + * @param FormStateInterface $form_state + * @param PrivateTempStore $store + * @param int $step + * @return array + * @see \Drupal\ami\Form\AmiMultiStepIngestBaseForm + * Each plugin is responsible for providing a Form step that is compatible with the + * AmiMultiStepIngestBaseForm + * + */ + public function stepFormAlter(&$form, FormStateInterface $form_state, PrivateTempStore $store, $step):void; + + /** * Get Data from the source * @@ -130,4 +147,15 @@ public function provideKeys(array $config, array $data):array; */ public function provideTypes(array $config, array $data):array; + /** + * During a Multistep Ingest Form Setup we can alter any steps/generated data + * + * @see \Drupal\ami\Form\AmiMultiStepIngestBaseForm + * + * @param $step + * @param PrivateTempStore $store + * @return void + */ + public function alterStepStore($step, PrivateTempStore $store):void; + } From 881b7fcfa23d5b0796027d0b759c18746a0a0045 Mon Sep 17 00:00:00 2001 From: Diego Pino Navarro Date: Fri, 3 May 2024 15:33:58 -0400 Subject: [PATCH 20/29] Move plugin initialization to the top so we can reuse it everywhere adds the $plugin_instance->stepFormAlter($form, $form_state, $this->store, $this->step); only if step >1. Could be really larger than 2 (bc step 2 is the actual plugin form .. but this would also allow a plugin to not define that, but add complex logic for its own $plugin_instance->interactiveForm($parents, $form_state) instead of $plugin_instance->interactiveForm($parents, $form_state); --- src/Form/AmiMultiStepIngest.php | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/src/Form/AmiMultiStepIngest.php b/src/Form/AmiMultiStepIngest.php index 71793ec..d41516b 100644 --- a/src/Form/AmiMultiStepIngest.php +++ b/src/Form/AmiMultiStepIngest.php @@ -74,6 +74,12 @@ public function __construct(PrivateTempStoreFactory $temp_store_factory, Session */ public function buildForm(array $form, FormStateInterface $form_state) { $form = parent::buildForm($form, $form_state); + + + /* @var $plugin_instance \Drupal\ami\Plugin\ImporterAdapterInterface | NULL */ + $plugin_instance = $this->store->get('plugininstance'); + $pluginValue = $this->store->get('plugin'); + $form['message-step'] = [ '#markup' => '
' . $this->t('AMI step @step of @laststep',[ '@step' => $this->step, @@ -101,10 +107,7 @@ public function buildForm(array $form, FormStateInterface $form_state) { if ($this->step == 2) { $parents = ['pluginconfig']; $form_state->setValue('pluginconfig', $this->store->get('pluginconfig')); - $pluginValue = $this->store->get('plugin'); // Only create a new instance if we do not have the PluginInstace around - /* @var $plugin_instance \Drupal\ami\Plugin\ImporterAdapterInterface | NULL */ - $plugin_instance = $this->store->get('plugininstance'); if (!$plugin_instance || $plugin_instance->getPluginid() != $pluginValue || $pluginValue == NULL) { $configuration = []; $configuration['config'] = ImporterAdapter::create(); @@ -114,13 +117,12 @@ public function buildForm(array $form, FormStateInterface $form_state) { $form['pluginconfig'] = $plugin_instance->interactiveForm($parents, $form_state); $form['pluginconfig']['#tree'] = TRUE; } - // TO keep this discrete and easier to edit maybe move to it's own method? + // To keep this discrete and easier to edit maybe move to their own methods? if ($this->step == 3) { // We should never reach this point if data is not enough. Submit handler // will go back to Step 2 if so. $data = $this->store->get('data') ?? []; $pluginconfig = $this->store->get('pluginconfig'); - $plugin_instance = $this->store->get('plugininstance'); $op = $pluginconfig['op']; $column_keys = $plugin_instance->provideKeys($pluginconfig, $data); $mapping = $this->store->get('mapping'); @@ -287,7 +289,6 @@ public function buildForm(array $form, FormStateInterface $form_state) { $data = $this->store->get('data') ?? []; $pluginconfig = $this->store->get('pluginconfig'); $op = $pluginconfig['op']; - $plugin_instance = $this->store->get('plugininstance'); $column_keys = $plugin_instance->provideKeys($pluginconfig, $data); $column_options = array_combine($column_keys, $column_keys); $mapping = $this->store->get('mapping'); @@ -308,8 +309,6 @@ public function buildForm(array $form, FormStateInterface $form_state) { $node_description = $this->t('Columns that hold either other row numbers or UUIDs(an existing ADO) connecting ADOs between each other (e.g "ismemberof"). You can choose multiple.'); } - - $form['ingestsetup']['adomapping']['parents'] = [ '#type' => 'select', '#title' => $this->t('ADO Parent Columns'), @@ -420,6 +419,10 @@ public function buildForm(array $form, FormStateInterface $form_state) { '#default_value' => 'AMI Set of ' . $this->currentUser()->getDisplayName() ]; } + // Allow the plugin to alter the forms if needed in any way + if ($plugin_instance && $this->step > 1) { + $plugin_instance->stepFormAlter($form, $form_state, $this->store, $this->step); + } return $form; } @@ -476,7 +479,6 @@ public function submitForm(array &$form, FormStateInterface $form_state) { if ($this->step == 3) { $this->store->delete('data'); /* @var $plugin_instance \Drupal\ami\Plugin\ImporterAdapterInterface| NULL */ - $plugin_instance = $this->store->get('plugininstance'); if ($plugin_instance) { $data = $plugin_instance->getInfo($this->store->get('pluginconfig'), $form_state,0,-1); // Check if the Plugin is ready processing or needs more data @@ -557,8 +559,6 @@ public function submitForm(array &$form, FormStateInterface $form_state) { $amisetdata->adomapping = $this->store->get('adomapping'); $amisetdata->zip = $this->store->get('zip'); $amisetdata->name = $ami_set_label; - /* @var $plugin_instance \Drupal\ami\Plugin\ImporterAdapterInterface| NULL */ - $plugin_instance = $this->store->get('plugininstance'); if ($plugin_instance) { if (!$plugin_instance->getPluginDefinition()['batch']) { $data = $plugin_instance->getData($this->store->get('pluginconfig'), From 1fe5c2973bddca18ede9466a18da7b3a83246d27 Mon Sep 17 00:00:00 2001 From: Diego Pino Navarro Date: Tue, 7 May 2024 16:12:10 -0400 Subject: [PATCH 21/29] What was a thinking? restore this on the submit handler --- src/Form/AmiMultiStepIngest.php | 1 + 1 file changed, 1 insertion(+) diff --git a/src/Form/AmiMultiStepIngest.php b/src/Form/AmiMultiStepIngest.php index d41516b..a162f34 100644 --- a/src/Form/AmiMultiStepIngest.php +++ b/src/Form/AmiMultiStepIngest.php @@ -479,6 +479,7 @@ public function submitForm(array &$form, FormStateInterface $form_state) { if ($this->step == 3) { $this->store->delete('data'); /* @var $plugin_instance \Drupal\ami\Plugin\ImporterAdapterInterface| NULL */ + $plugin_instance = $this->store->get('plugininstance'); if ($plugin_instance) { $data = $plugin_instance->getInfo($this->store->get('pluginconfig'), $form_state,0,-1); // Check if the Plugin is ready processing or needs more data From 4e4a9fac82e5a76a6d62a83a806a779f9196c8c1 Mon Sep 17 00:00:00 2001 From: Diego Pino Navarro Date: Tue, 7 May 2024 16:13:05 -0400 Subject: [PATCH 22/29] Provide Fixed types. for EAD & alter the ingest form so it is always custom we will only parse 'ArchiveContainer' 'ArchiveComponent' --- src/Plugin/ImporterAdapter/EADImporter.php | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/src/Plugin/ImporterAdapter/EADImporter.php b/src/Plugin/ImporterAdapter/EADImporter.php index cf21e9b..18f9a2d 100644 --- a/src/Plugin/ImporterAdapter/EADImporter.php +++ b/src/Plugin/ImporterAdapter/EADImporter.php @@ -9,6 +9,7 @@ use Drupal\Core\StreamWrapper\StreamWrapperManagerInterface; use Drupal\Core\StringTranslation\StringTranslationTrait; use Drupal\ami\Plugin\ImporterAdapterBase; +use Drupal\Core\TempStore\PrivateTempStore; use PhpOffice\PhpSpreadsheet\IOFactory; use Symfony\Component\DependencyInjection\ContainerInterface; use Drupal\file\Entity\File; @@ -114,7 +115,7 @@ public function getInfo(array $config, FormStateInterface $form_state, $page = 0 return $this->getData($config, $page, $per_page); } - public function provideKeys(array $config, array $data): array + public function provideTypes(array $config, array $data): array { // These are our discussed types. No flexibility here. return [ @@ -123,7 +124,6 @@ public function provideKeys(array $config, array $data): array ]; } - /** * Shutdown that "should" clean temp file if one was generated */ @@ -133,4 +133,17 @@ public function shutdown() { $this->AmiUtilityService->cleanUpTemp($this->tempFile); } } + + public function stepFormAlter(&$form, FormStateInterface $form_state, PrivateTempStore $store, $step): void + { + if ($step == 3) + $form['ingestsetup']['globalmapping'] = [ + '#type' => 'select', + '#title' => $this->t('Select the data transformation approach'), + '#default_value' => 'custom', + '#options' => ['custom' => 'Custom (Expert Mode)'], + '#description' => $this->t('How your source data will be transformed into EADs Metadata.'), + '#required' => TRUE, + ]; + } } From 5090603d2670743426fc6b92f6f720bfc9b2fc55 Mon Sep 17 00:00:00 2001 From: Diego Pino Navarro Date: Tue, 7 May 2024 18:10:38 -0400 Subject: [PATCH 23/29] Alters the custom mapping step for EAD only via templates --- src/Plugin/ImporterAdapter/EADImporter.php | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/src/Plugin/ImporterAdapter/EADImporter.php b/src/Plugin/ImporterAdapter/EADImporter.php index 18f9a2d..c8b668e 100644 --- a/src/Plugin/ImporterAdapter/EADImporter.php +++ b/src/Plugin/ImporterAdapter/EADImporter.php @@ -136,7 +136,7 @@ public function shutdown() { public function stepFormAlter(&$form, FormStateInterface $form_state, PrivateTempStore $store, $step): void { - if ($step == 3) + if ($step == 3) { $form['ingestsetup']['globalmapping'] = [ '#type' => 'select', '#title' => $this->t('Select the data transformation approach'), @@ -145,5 +145,15 @@ public function stepFormAlter(&$form, FormStateInterface $form_state, PrivateTem '#description' => $this->t('How your source data will be transformed into EADs Metadata.'), '#required' => TRUE, ]; + foreach ($form['ingestsetup']['custommapping'] ?? [] as $key => &$settings) { + if (strpos($key,'#') !== 0 && is_array($settings)) { + if ($settings['metadata']['#default_value'] ?? NULL) { + $form['ingestsetup']['custommapping'][$key]['metadata']['#default_value'] = 'template'; + $form['ingestsetup']['custommapping'][$key]['metadata']['#options'] = ['template' => 'Template']; + } + } + } + } + $form = $form; } } From 048a73831752cb7d13caf4b3df76d1149195aab9 Mon Sep 17 00:00:00 2001 From: Diego Pino Navarro Date: Tue, 7 May 2024 21:41:47 -0400 Subject: [PATCH 24/29] Code ready for testing (gosh) @alliomeria will give it a spin tomorrow AM. I had to make some hard choices. Automatic UUID generation at this moment of the project is a bad idea. Anything can go wrong, missing ID to generate, repeated UUIDs on the parents, etc, etc. So i will better make the scrip that generates the EAD structure well formed and public and all UUID generation will have there. Once we have tested this the actual script will become part of the process (one can uploads XMLs instead of a CSV) --- src/Form/AmiMultiStepIngest.php | 39 +++++++++++----- src/Plugin/ImporterAdapter/EADImporter.php | 45 ++++++++++++++++++- src/Plugin/ImporterAdapterBase.php | 17 +++---- src/Plugin/ImporterAdapterInterface.php | 10 ++--- .../QueueWorker/IngestADOQueueWorker.php | 4 +- 5 files changed, 84 insertions(+), 31 deletions(-) diff --git a/src/Form/AmiMultiStepIngest.php b/src/Form/AmiMultiStepIngest.php index a162f34..2fda14b 100644 --- a/src/Form/AmiMultiStepIngest.php +++ b/src/Form/AmiMultiStepIngest.php @@ -75,17 +75,14 @@ public function __construct(PrivateTempStoreFactory $temp_store_factory, Session public function buildForm(array $form, FormStateInterface $form_state) { $form = parent::buildForm($form, $form_state); - - /* @var $plugin_instance \Drupal\ami\Plugin\ImporterAdapterInterface | NULL */ - $plugin_instance = $this->store->get('plugininstance'); - $pluginValue = $this->store->get('plugin'); - $form['message-step'] = [ - '#markup' => '
' . $this->t('AMI step @step of @laststep',[ + '#markup' => '
' . $this->t('AMI step @step of @laststep', [ '@step' => $this->step, '@laststep' => $this->lastStep, ]) . '
', ]; + $plugin_instance = NULL; + $pluginValue = NULL; if ($this->step == 1) { $pluginValue = $this->store->get('plugin'); $definitions = $this->importerManager->getDefinitions(); @@ -93,6 +90,8 @@ public function buildForm(array $form, FormStateInterface $form_state) { foreach ($definitions as $id => $definition) { $options[$id] = $definition['label']; } + // Reset the plugin instance in case of code change. + $this->store->set('plugininstance', NULL); $form['plugin'] = [ '#type' => 'select', @@ -104,6 +103,12 @@ public function buildForm(array $form, FormStateInterface $form_state) { '#empty_option' => $this->t('- Please select a plugin -'), ]; } + if ($this->step > 1) { + //Only carry the plugin instance + /* @var $plugin_instance \Drupal\ami\Plugin\ImporterAdapterInterface | NULL */ + $plugin_instance = $this->store->get('plugininstance'); + $pluginValue = $this->store->get('plugin'); + } if ($this->step == 2) { $parents = ['pluginconfig']; $form_state->setValue('pluginconfig', $this->store->get('pluginconfig')); @@ -111,12 +116,13 @@ public function buildForm(array $form, FormStateInterface $form_state) { if (!$plugin_instance || $plugin_instance->getPluginid() != $pluginValue || $pluginValue == NULL) { $configuration = []; $configuration['config'] = ImporterAdapter::create(); - $plugin_instance = $this->importerManager->createInstance($pluginValue,$configuration); - $this->store->set('plugininstance',$plugin_instance); + $plugin_instance = $this->importerManager->createInstance($pluginValue, $configuration); + $this->store->set('plugininstance', $plugin_instance); } $form['pluginconfig'] = $plugin_instance->interactiveForm($parents, $form_state); $form['pluginconfig']['#tree'] = TRUE; } + // To keep this discrete and easier to edit maybe move to their own methods? if ($this->step == 3) { // We should never reach this point if data is not enough. Submit handler @@ -475,11 +481,11 @@ public function submitForm(array &$form, FormStateInterface $form_state) { if ($form_state->getValue('pluginconfig', [])) { $this->store->set('pluginconfig', $form_state->getValue('pluginconfig')); } + /* @var $plugin_instance \Drupal\ami\Plugin\ImporterAdapterInterface| NULL */ + $plugin_instance = $this->store->get('plugininstance'); // First data fetch step if ($this->step == 3) { $this->store->delete('data'); - /* @var $plugin_instance \Drupal\ami\Plugin\ImporterAdapterInterface| NULL */ - $plugin_instance = $this->store->get('plugininstance'); if ($plugin_instance) { $data = $plugin_instance->getInfo($this->store->get('pluginconfig'), $form_state,0,-1); // Check if the Plugin is ready processing or needs more data @@ -496,6 +502,7 @@ public function submitForm(array &$form, FormStateInterface $form_state) { // Total rows contains data without headers So a single one is good enough. if (is_array($data) && !empty($data) and isset($data['headers']) && ((count($data['headers']) >= 3) || (count($data['headers']) >= 2 && $op != 'create')) && isset($data['totalrows']) && $data['totalrows'] >= 1) { $this->store->set('data', $data); + $plugin_instance->alterStepStore($form_state, $this->store, $this->step); } else { // Not the data we are looking for? Back to Step 2. @@ -528,12 +535,18 @@ public function submitForm(array &$form, FormStateInterface $form_state) { ] ] ]); + if ($plugin_instance) { + $plugin_instance->alterStepStore($form_state, $this->store, $this->step); + } } } if ($this->step == 5) { if ($form_state->getTriggeringElement()['#name'] !== 'prev') { $adomapping = $form_state->getValue('adomapping'); $this->store->set('adomapping', $adomapping); + if ($plugin_instance) { + $plugin_instance->alterStepStore($form_state, $this->store, $this->step); + } } } if ($this->step == 6) { @@ -550,6 +563,9 @@ public function submitForm(array &$form, FormStateInterface $form_state) { } else { $this->store->set('zip', NULL); } + if ($plugin_instance) { + $plugin_instance->alterStepStore($form_state, $this->store, $this->step); + } $ami_set_label = $form_state->getValue('ami_set_label', NULL); $ami_set_label = $ami_set_label ? trim($ami_set_label) : $ami_set_label; $amisetdata = new \stdClass(); @@ -643,6 +659,9 @@ public function submitForm(array &$form, FormStateInterface $form_state) { $form_state->setRebuild(TRUE); } else { if (!empty($batch)) { + if ($plugin_instance) { + $plugin_instance->alterStepStore($form_state, $this->store, $this->step); + } batch_set($batch); } } diff --git a/src/Plugin/ImporterAdapter/EADImporter.php b/src/Plugin/ImporterAdapter/EADImporter.php index c8b668e..f894b8c 100644 --- a/src/Plugin/ImporterAdapter/EADImporter.php +++ b/src/Plugin/ImporterAdapter/EADImporter.php @@ -21,7 +21,7 @@ * id = "ead", * label = @Translation("EAD Importer"), * remote = false, - * batch = true, + * batch = false, * ) */ class EADImporter extends SpreadsheetImporter { @@ -154,6 +154,49 @@ public function stepFormAlter(&$form, FormStateInterface $form_state, PrivateTem } } } + if ($step == 4) { + $current_options = $form['ingestsetup']['adomapping']['parents']['#options']; + $current_options['iscontainedby'] = 'iscontainedby (used by CSV child container)'; + // We add this just in case the top level does not has it. + $current_options['ispartof'] = 'ispartof'; + unset($current_options['node_uuid']); + if (empty($form['ingestsetup']['adomapping']['parents']['#default_value'])) { + $form['ingestsetup']['adomapping']['parents']['#default_value'] = ['ispartof', 'iscontainedby']; + } + $form['ingestsetup']['adomapping']['parents']['#options'] = $current_options; + $form['ingestsetup']['adomapping']['autouuid'] = [ + '#disabled' => TRUE, + '#default_value' => FALSE, + ]; + $form['ingestsetup']['adomapping']['uuid'] = [ + '#default_value' => 'node_uuid', + '#disabled' => TRUE, + ]; + foreach ($form['ingestsetup']['custommapping'] ?? [] as $key => &$settings) { + if (strpos($key,'#') !== 0 && is_array($settings)) { + if ($settings['metadata']['#default_value'] ?? NULL) { + $form['ingestsetup']['custommapping'][$key]['metadata']['#default_value'] = 'template'; + $form['ingestsetup']['custommapping'][$key]['metadata']['#options'] = ['template' => 'Template']; + } + } + } + } $form = $form; } + + /** + * @inheritDoc + */ + public function alterStepStore(FormStateInterface $form_state, PrivateTempStore $store, int $step = 1): void { + if ($step == 4) { + $mapping = $store->get('mapping'); + // We only set this for ArchiveContainer, that way we won't have nested of nested CSVs (means the container CSV + // won't have nested CSV again. We can. We won't + if (isset($mapping['custommapping_settings']['ArchiveContainer'])) { + $mapping['custommapping_settings']['ArchiveContainer']['files_csv'] = ['dsc_csv']; + // Will be used by \Drupal\ami\Plugin\QueueWorker\IngestADOQueueWorker::processItem + } + $store->set('mapping', $mapping); + } + } } diff --git a/src/Plugin/ImporterAdapterBase.php b/src/Plugin/ImporterAdapterBase.php index 77c852b..d857ede 100644 --- a/src/Plugin/ImporterAdapterBase.php +++ b/src/Plugin/ImporterAdapterBase.php @@ -160,22 +160,15 @@ public function provideTypes(array $config, array $data): array { return $alltypes; } - /** - * During a Multistep Ingest Form Setup we can alter any steps/generated data - * @see \Drupal\ami\Form\AmiMultiStepIngestBaseForm - * - * @param $step - * @param PrivateTempStore $store - * @return void - */ - public function alterStepStore($step, PrivateTempStore $store):void { - - } - public function stepFormAlter(&$form, FormStateInterface $form_state, PrivateTempStore $store, $step): void { // TODO: Implement stepFormAlter() method. } + public function alterStepStore(FormStateInterface $form_state, PrivateTempStore $store, int $step = 1): void + { + // TODO: Implement alterStepStore() method. + } + } diff --git a/src/Plugin/ImporterAdapterInterface.php b/src/Plugin/ImporterAdapterInterface.php index 8213693..74b031e 100644 --- a/src/Plugin/ImporterAdapterInterface.php +++ b/src/Plugin/ImporterAdapterInterface.php @@ -148,14 +148,14 @@ public function provideKeys(array $config, array $data):array; public function provideTypes(array $config, array $data):array; /** - * During a Multistep Ingest Form Setup we can alter any steps/generated data + * During a Multistep Ingest Form Submit, we can alter any steps/generated data * - * @see \Drupal\ami\Form\AmiMultiStepIngestBaseForm - * - * @param $step + * @param FormStateInterface $form_state * @param PrivateTempStore $store + * @param int $step * @return void + * @see \Drupal\ami\Form\AmiMultiStepIngestBaseForm */ - public function alterStepStore($step, PrivateTempStore $store):void; + public function alterStepStore(FormStateInterface $form_state, PrivateTempStore $store, int $step = 1):void; } diff --git a/src/Plugin/QueueWorker/IngestADOQueueWorker.php b/src/Plugin/QueueWorker/IngestADOQueueWorker.php index a3b3f80..0fe7d9b 100644 --- a/src/Plugin/QueueWorker/IngestADOQueueWorker.php +++ b/src/Plugin/QueueWorker/IngestADOQueueWorker.php @@ -580,9 +580,7 @@ public function processItem($data) { // Only persist if we passed this. // True if all ok, to the best of our knowledge of course $persisted = $this->persistEntity($data, $processed_metadata); - // @TODO another question for @alliomera. If i don't check if the ROW was ingested.. we could process a Child CSV - // EVEN if the parent object was A) there already OR, was non even valid. That could be useful? - // A Processing setting like "don't process if file missing?" + // We only process a CSV column IF and only if the row that contains it generated an ADO. if ($persisted && !empty($file_csv_columns)) { $current_uuid = $data->info['row']['uuid'] ?? NULL; $current_row_id = $data->info['row']['row_id'] ?? NULL; From 454cb9c5f08b962374ed2c15af4d5babedd5a1eb Mon Sep 17 00:00:00 2001 From: Diego Pino Navarro Date: Thu, 9 May 2024 21:41:14 -0400 Subject: [PATCH 25/29] So many tiny mistakes @alliomeria this has not been my best week. Testing now again nested ingests. Sorry :( --- src/Plugin/ImporterAdapter/EADImporter.php | 8 +++++--- src/Plugin/QueueWorker/CsvADOQueueWorker.php | 4 +++- src/Plugin/QueueWorker/IngestADOQueueWorker.php | 2 +- 3 files changed, 9 insertions(+), 5 deletions(-) diff --git a/src/Plugin/ImporterAdapter/EADImporter.php b/src/Plugin/ImporterAdapter/EADImporter.php index f894b8c..da1aab1 100644 --- a/src/Plugin/ImporterAdapter/EADImporter.php +++ b/src/Plugin/ImporterAdapter/EADImporter.php @@ -190,10 +190,12 @@ public function stepFormAlter(&$form, FormStateInterface $form_state, PrivateTem public function alterStepStore(FormStateInterface $form_state, PrivateTempStore $store, int $step = 1): void { if ($step == 4) { $mapping = $store->get('mapping'); - // We only set this for ArchiveContainer, that way we won't have nested of nested CSVs (means the container CSV + // We only set this for ArchiveComponent, that way we won't have nested of nested CSVs (means the container CSV // won't have nested CSV again. We can. We won't - if (isset($mapping['custommapping_settings']['ArchiveContainer'])) { - $mapping['custommapping_settings']['ArchiveContainer']['files_csv'] = ['dsc_csv']; + // Diego wake up! + if (isset($mapping['custommapping_settings']['ArchiveComponent'])) { + // Needs to be a an associative array bc we convert into object afterwards and access the files_csv as property + $mapping['custommapping_settings']['ArchiveComponent']['files_csv'] = ['dsc_csv' => 'dsc_csv']; // Will be used by \Drupal\ami\Plugin\QueueWorker\IngestADOQueueWorker::processItem } $store->set('mapping', $mapping); diff --git a/src/Plugin/QueueWorker/CsvADOQueueWorker.php b/src/Plugin/QueueWorker/CsvADOQueueWorker.php index ceacf45..50a919e 100644 --- a/src/Plugin/QueueWorker/CsvADOQueueWorker.php +++ b/src/Plugin/QueueWorker/CsvADOQueueWorker.php @@ -88,6 +88,7 @@ public function processItem($data) { $info = $this->AmiUtilityService->preprocessAmiSet($data->info['csv_file'], $data, $invalid, FALSE); if (!count($info)) { + //@TODO tell the user which CSV failed please? $message = $this->t('So sorry. CSV for @setid produced no ADOs. Please correct your source CSV data', [ '@setid' => $data->info['set_id'] ]); @@ -149,9 +150,10 @@ public function processItem($data) { $processed_set_status['processed'] = $processed_set_status['processed'] ?? 0; $processed_set_status['errored'] = $processed_set_status['errored'] ?? 0; $processed_set_status['total'] = $processed_set_status['total'] ?? 0 + count($added); - $this->statusStore->set('set_' . $this->entity->id(), $processed_set_status); + $this->statusStore->set('set_' . $data->info['set_id'], $processed_set_status); return; } + // @TODO add a logger error saying it was enqueued as CSV but there was no CSV file to be found return; } } diff --git a/src/Plugin/QueueWorker/IngestADOQueueWorker.php b/src/Plugin/QueueWorker/IngestADOQueueWorker.php index 0fe7d9b..6b1604a 100644 --- a/src/Plugin/QueueWorker/IngestADOQueueWorker.php +++ b/src/Plugin/QueueWorker/IngestADOQueueWorker.php @@ -677,7 +677,7 @@ private function persistEntity(\stdClass $data, array $processed_metadata) { 'time_submitted' => $data->info['time_submitted'] ?? '', ]); $this->setStatus(amiSetEntity::STATUS_PROCESSING_WITH_ERRORS, $data); - return; + return FALSE; } $bundle = $property_path_split[0]; From ed65d9ab8d77693655e9c2b21b233ce39572bc94 Mon Sep 17 00:00:00 2001 From: Diego Pino Navarro Date: Thu, 9 May 2024 22:02:09 -0400 Subject: [PATCH 26/29] Finally. My distraction/spread-thin-levels are at a new high --- src/Plugin/QueueWorker/IngestADOQueueWorker.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Plugin/QueueWorker/IngestADOQueueWorker.php b/src/Plugin/QueueWorker/IngestADOQueueWorker.php index 6b1604a..08d82bc 100644 --- a/src/Plugin/QueueWorker/IngestADOQueueWorker.php +++ b/src/Plugin/QueueWorker/IngestADOQueueWorker.php @@ -1079,7 +1079,7 @@ protected function processCSvFile($data): \Drupal\Core\Entity\EntityInterface|\D $zip_file_id = is_object($data->info['zip_file']) && $data->info['zip_file'] instanceof FileInterface ? (string) $data->info['zip_file']->id() : '0'; $file = $this->AmiUtilityService->file_get(trim($data->info['csv_filename']), $data->info['zip_file'], TRUE); - if ($file && $file->getMimeType() == 'application/csv') { + if ($file && $file->getMimeType() == 'text/csv') { return $file; } else { From 72164328d0b6608c746f64de68b176759312c7f2 Mon Sep 17 00:00:00 2001 From: Diego Pino Navarro Date: Thu, 9 May 2024 22:30:19 -0400 Subject: [PATCH 27/29] Adds composting for nested extracted CSV or we will end with 500 Mbytes of stuff that is unneeded on every batch run --- .../QueueWorker/IngestADOQueueWorker.php | 60 +++++++++++++++---- 1 file changed, 47 insertions(+), 13 deletions(-) diff --git a/src/Plugin/QueueWorker/IngestADOQueueWorker.php b/src/Plugin/QueueWorker/IngestADOQueueWorker.php index 08d82bc..8ef8a91 100644 --- a/src/Plugin/QueueWorker/IngestADOQueueWorker.php +++ b/src/Plugin/QueueWorker/IngestADOQueueWorker.php @@ -5,6 +5,7 @@ use Drupal\ami\AmiLoDService; use Drupal\ami\AmiUtilityService; use Drupal\ami\Entity\amiSetEntity; +use Drupal\Core\Datetime\DrupalDateTime; use Drupal\file\FileInterface; use Drupal\Core\Entity\EntityTypeManagerInterface; use Drupal\Core\Logger\LoggerChannelFactoryInterface; @@ -12,6 +13,8 @@ use Drupal\Core\Plugin\ContainerFactoryPluginInterface; use Drupal\Core\Queue\QueueWorkerBase; use Drupal\Core\StringTranslation\StringTranslationTrait; +use Drupal\strawberryfield\Event\StrawberryfieldFileEvent; +use Drupal\strawberryfield\StrawberryfieldEventType; use Drupal\strawberryfield\StrawberryfieldFilePersisterService; use Drupal\strawberryfield\StrawberryfieldUtilityService; use Monolog\Handler\StreamHandler; @@ -21,6 +24,7 @@ use Swaggest\JsonDiff\JsonDiff; use Drupal\strawberryfield\Tools\StrawberryfieldJsonHelper; use \Drupal\Core\TempStore\PrivateTempStoreFactory; +use Symfony\Component\EventDispatcher\EventDispatcherInterface; /** * Processes and Ingests each AMI Set CSV row. @@ -109,20 +113,28 @@ class IngestADOQueueWorker extends QueueWorkerBase implements ContainerFactoryPl 'patch' => 'patched', ]; + /** + * The event dispatcher. + * + * @var EventDispatcherInterface + */ + protected EventDispatcherInterface $eventDispatcher; + /** * Constructor. * * @param array $configuration * @param string $plugin_id * @param mixed $plugin_definition - * @param \Drupal\Core\Entity\EntityTypeManagerInterface $entity_type_manager - * @param \Drupal\Core\Logger\LoggerChannelFactoryInterface $logger_factory - * @param \Drupal\strawberryfield\StrawberryfieldUtilityService $strawberryfield_utility_service - * @param \Drupal\ami\AmiUtilityService $ami_utility - * @param \Drupal\ami\AmiLoDService $ami_lod - * @param \Drupal\Core\Messenger\MessengerInterface $messenger - * @param \Drupal\strawberryfield\StrawberryfieldFilePersisterService $strawberry_filepersister - * @param \Drupal\Core\TempStore\PrivateTempStoreFactory $temp_store_factory + * @param EntityTypeManagerInterface $entity_type_manager + * @param LoggerChannelFactoryInterface $logger_factory + * @param StrawberryfieldUtilityService $strawberryfield_utility_service + * @param AmiUtilityService $ami_utility + * @param AmiLoDService $ami_lod + * @param MessengerInterface $messenger + * @param StrawberryfieldFilePersisterService $strawberry_filepersister + * @param PrivateTempStoreFactory $temp_store_factory + * @param EventDispatcherInterface $event_dispatcher */ public function __construct( array $configuration, @@ -135,7 +147,8 @@ public function __construct( AmiLoDService $ami_lod, MessengerInterface $messenger, StrawberryfieldFilePersisterService $strawberry_filepersister, - PrivateTempStoreFactory $temp_store_factory + PrivateTempStoreFactory $temp_store_factory, + EventDispatcherInterface $event_dispatcher ) { parent::__construct($configuration, $plugin_id, $plugin_definition); $this->entityTypeManager = $entity_type_manager; @@ -147,6 +160,7 @@ public function __construct( $this->strawberryfilepersister = $strawberry_filepersister; $this->store = $temp_store_factory->get('ami_queue_worker_file'); $this->statusStore = $temp_store_factory->get('ami_queue_status'); + $this->eventDispatcher = $event_dispatcher; } /** @@ -176,7 +190,8 @@ public static function create( $container->get('ami.lod'), $container->get('messenger'), $container->get('strawberryfield.file_persister'), - $container->get('tempstore.private') + $container->get('tempstore.private'), + $container->get('event_dispatcher') ); } @@ -556,7 +571,7 @@ public function processItem($data) { } if ($process_files_via_queue && empty($data->info['waiting_for_files'])) { - // If so we need to push this one to the end.. + // If so we need to push this one to the end. // Reset the attempts $data->info['waiting_for_files'] = TRUE; $data->info['attempt'] = $data->info['attempt'] ? $data->info['attempt'] + 1 : 0; @@ -594,6 +609,8 @@ public function processItem($data) { return trim($value); }, explode(';', $filenames)); $filenames = array_filter($filenames); + // We will keep the original row ID, so we can log it. + $data_csv->info['row']['row_id'] = $data->info['row']['row_id']; foreach($filenames as $filename) { $data_csv->info['csv_filename'] = $filename; $csv_file = $this->processCSvFile($data_csv); @@ -1076,13 +1093,30 @@ protected function processCSvFile($data): \Drupal\Core\Entity\EntityInterface|\D if (!($data->info['csv_filename'] ?? NULL)) { return NULL; } - $zip_file_id = is_object($data->info['zip_file']) && $data->info['zip_file'] instanceof FileInterface ? (string) $data->info['zip_file']->id() : '0'; $file = $this->AmiUtilityService->file_get(trim($data->info['csv_filename']), - $data->info['zip_file'], TRUE); + $data->info['zip_file'] ?? NULL, TRUE); + if ($file) { + $event_type = StrawberryfieldEventType::TEMP_FILE_CREATION; + $current_timestamp = (new DrupalDateTime())->getTimestamp(); + $event = new StrawberryfieldFileEvent($event_type, 'ami', $file->getFileUri(), $current_timestamp); + // This will allow the extracted CSV from the zip to be composted, even if it was not a CSV. + // IN a queue by \Drupal\strawberryfield\EventSubscriber\StrawberryfieldEventCompostBinSubscriber + $this->eventDispatcher->dispatch($event, $event_type); + } if ($file && $file->getMimeType() == 'text/csv') { return $file; } else { + $message = $this->t('The referenced nested CSV @filename on row id @rowid from Set @setid could not be found or had the wrong format. Skipping', + [ + '@setid' => $data->info['set_id'], + '@filename' => $data->info['csv_filename'], + '@rowid' => $data->info['row']['row_id'] ?? '0', + ]); + $this->loggerFactory->get('ami_file')->warning($message ,[ + 'setid' => $data->info['set_id'] ?? NULL, + 'time_submitted' => $data->info['time_submitted'] ?? '', + ]); return NULL; } } From 010eb4e70a64d18fbae3cff3146741899846eb39 Mon Sep 17 00:00:00 2001 From: Diego Pino Navarro Date: Thu, 9 May 2024 23:53:13 -0400 Subject: [PATCH 28/29] Cleaner more checks --- src/Plugin/QueueWorker/CsvADOQueueWorker.php | 18 +++++++--- .../QueueWorker/IngestADOQueueWorker.php | 33 ++++++++++--------- 2 files changed, 30 insertions(+), 21 deletions(-) diff --git a/src/Plugin/QueueWorker/CsvADOQueueWorker.php b/src/Plugin/QueueWorker/CsvADOQueueWorker.php index 50a919e..463aa96 100644 --- a/src/Plugin/QueueWorker/CsvADOQueueWorker.php +++ b/src/Plugin/QueueWorker/CsvADOQueueWorker.php @@ -79,8 +79,10 @@ public function processItem($data) { $added = []; // @TODO discuss with Allison the idea that one could ingest with "AMI set" data but without an actual AMI set? // That would require, e.g generating a fake $data->info['set_id'] - if (!empty($data->info['csv_file'])) { + $csv_file = $data->info['csv_file'] ?? NULL; + if ($csv_file instanceof FileInterface) { $invalid = []; + // Note. We won't process the nested CSV here. This queue worker only takes a CSV and splits into smaller // chunks. Basically what the \Drupal\ami\Form\amiSetEntityProcessForm::submitForm already does. // But the ADO worker itself will (new code) extract a CSV and then again, enqueue back to this so this one can yet again @@ -89,8 +91,9 @@ public function processItem($data) { if (!count($info)) { //@TODO tell the user which CSV failed please? - $message = $this->t('So sorry. CSV for @setid produced no ADOs. Please correct your source CSV data', [ - '@setid' => $data->info['set_id'] + $message = $this->t('So sorry. CSV @csv for @setid produced no ADOs. Please correct your source CSV data', [ + '@setid' => $data->info['set_id'], + '@csv' => $csv_file->getFilename(), ]); $this->loggerFactory->get('ami_file')->warning($message, [ 'setid' => $data->info['set_id'] ?? NULL, @@ -125,8 +128,10 @@ public function processItem($data) { ->createItem($adodata); } if (count($added)) { - $message = $this->t('CSV for Set @setid was expanded to ADOs', [ - '@setid' => $data->info['set_id'] + $message = $this->t('CSV @csv for Set @setid was expanded to @count ADOs', [ + '@setid' => $data->info['set_id'], + '@csv' => $csv_file->getFilename(), + '@count' => count($added), ]); $this->loggerFactory->get('ami_file')->info($message, [ 'setid' => $data->info['set_id'] ?? NULL, @@ -153,6 +158,9 @@ public function processItem($data) { $this->statusStore->set('set_' . $data->info['set_id'], $processed_set_status); return; } + else { + error_log('wrongly enqueued'); + } // @TODO add a logger error saying it was enqueued as CSV but there was no CSV file to be found return; } diff --git a/src/Plugin/QueueWorker/IngestADOQueueWorker.php b/src/Plugin/QueueWorker/IngestADOQueueWorker.php index 8ef8a91..37ebfc9 100644 --- a/src/Plugin/QueueWorker/IngestADOQueueWorker.php +++ b/src/Plugin/QueueWorker/IngestADOQueueWorker.php @@ -138,8 +138,8 @@ class IngestADOQueueWorker extends QueueWorkerBase implements ContainerFactoryPl */ public function __construct( array $configuration, - $plugin_id, - $plugin_definition, + $plugin_id, + $plugin_definition, EntityTypeManagerInterface $entity_type_manager, LoggerChannelFactoryInterface $logger_factory, StrawberryfieldUtilityService $strawberryfield_utility_service, @@ -176,8 +176,8 @@ public function __construct( public static function create( ContainerInterface $container, array $configuration, - $plugin_id, - $plugin_definition + $plugin_id, + $plugin_definition ) { return new static( empty($configuration) ? [] : $configuration, @@ -610,20 +610,21 @@ public function processItem($data) { }, explode(';', $filenames)); $filenames = array_filter($filenames); // We will keep the original row ID, so we can log it. - $data_csv->info['row']['row_id'] = $data->info['row']['row_id']; + $data_csv->info['row']['row_id'] = $current_row_id; foreach($filenames as $filename) { - $data_csv->info['csv_filename'] = $filename; - $csv_file = $this->processCSvFile($data_csv); - if ($csv_file) { - $data_csv->info['csv_file'] = $csv_file; - // Push to the CSV queue - \Drupal::queue('ami_csv_ado') - ->createItem($data_csv); - } + $data_csv->info['csv_filename'] = $filename; + $csv_file = $this->processCSvFile($data_csv); + if ($csv_file) { + $data_csv->info['csv_file'] = $csv_file; + // Push to the CSV queue + \Drupal::queue('ami_csv_ado') + ->createItem($data_csv); + } } } } } + return; } @@ -1094,7 +1095,7 @@ protected function processCSvFile($data): \Drupal\Core\Entity\EntityInterface|\D return NULL; } $file = $this->AmiUtilityService->file_get(trim($data->info['csv_filename']), - $data->info['zip_file'] ?? NULL, TRUE); + $data->info['zip_file'] ?? NULL, TRUE); if ($file) { $event_type = StrawberryfieldEventType::TEMP_FILE_CREATION; $current_timestamp = (new DrupalDateTime())->getTimestamp(); @@ -1104,7 +1105,7 @@ protected function processCSvFile($data): \Drupal\Core\Entity\EntityInterface|\D $this->eventDispatcher->dispatch($event, $event_type); } if ($file && $file->getMimeType() == 'text/csv') { - return $file; + return $file; } else { $message = $this->t('The referenced nested CSV @filename on row id @rowid from Set @setid could not be found or had the wrong format. Skipping', @@ -1117,7 +1118,7 @@ protected function processCSvFile($data): \Drupal\Core\Entity\EntityInterface|\D 'setid' => $data->info['set_id'] ?? NULL, 'time_submitted' => $data->info['time_submitted'] ?? '', ]); - return NULL; + return NULL; } } From dc070b199f3dba1d06b336184e819f2389f8adb3 Mon Sep 17 00:00:00 2001 From: Diego Pino Navarro Date: Fri, 10 May 2024 16:14:26 -0400 Subject: [PATCH 29/29] Deals with ISSUE-200 See #200 or https://github.com/esmero/ami/issues/200 Note. I think we should also on replace/append and even on full replace log the JSON diff the way we do via VBO replace. @alliomeria thoughts? --- .../QueueWorker/IngestADOQueueWorker.php | 30 +++++++++++++------ 1 file changed, 21 insertions(+), 9 deletions(-) diff --git a/src/Plugin/QueueWorker/IngestADOQueueWorker.php b/src/Plugin/QueueWorker/IngestADOQueueWorker.php index 37ebfc9..c7c6d92 100644 --- a/src/Plugin/QueueWorker/IngestADOQueueWorker.php +++ b/src/Plugin/QueueWorker/IngestADOQueueWorker.php @@ -709,12 +709,15 @@ private function persistEntity(\stdClass $data, array $processed_metadata) { $status = $data->info['status'][$bundle] ?? 0; // default Sortfile which will respect the ingest order. If there was already one set, preserve. $sort_files = isset($processed_metadata['ap:tasks']) && isset($processed_metadata['ap:tasks']['ap:sortfiles']) ? $processed_metadata['ap:tasks']['ap:sortfiles'] : 'index'; - if (isset($processed_metadata['ap:tasks']) && is_array($processed_metadata['ap:tasks'])) { - $processed_metadata['ap:tasks']['ap:sortfiles'] = $sort_files; - } - else { - $processed_metadata['ap:tasks'] = []; - $processed_metadata['ap:tasks']['ap:sortfiles'] = $sort_files; + // We can't blindly override ap:tasks if we are dealing with an update operation. So make an exception here for only create + // And deal with the same for update but later + if ($op ==='create') { + if (isset($processed_metadata['ap:tasks']) && is_array($processed_metadata['ap:tasks'])) { + $processed_metadata['ap:tasks']['ap:sortfiles'] = $sort_files; + } else { + $processed_metadata['ap:tasks'] = []; + $processed_metadata['ap:tasks']['ap:sortfiles'] = $sort_files; + } } // JSON_ENCODE AGAIN! @@ -827,7 +830,6 @@ private function persistEntity(\stdClass $data, array $processed_metadata) { $processed_metadata = $original_value; } - if (isset($data->info['log_jsonpatch']) && $data->info['log_jsonpatch']) { $this->patchJson($original_value ?? [], $processed_metadata ?? [], true); } @@ -891,8 +893,18 @@ function str_starts_with($haystack, $needle) { } $processed_metadata = $original_value; } - // @TODO. Log this? - // $this->patchJson($original_value, $processed_metadata); + + // Now deal again with ap:tasks only if the replace/append operation stripped the basics out + + if (isset($processed_metadata['ap:tasks']) && is_array($processed_metadata['ap:tasks'])) { + // basically reuse what is there (which at this stage will be a mix of original data and new or default to the $sort file defined before + $processed_metadata['ap:tasks']['ap:sortfiles'] = $processed_metadata['ap:tasks']['ap:sortfiles'] ?? $sort_files; + } else { + // Only set if after all the options it was removed at all. + $processed_metadata['ap:tasks'] = []; + $processed_metadata['ap:tasks']['ap:sortfiles'] = $sort_files; + } + $itemfield->setMainValueFromArray($processed_metadata); break;