diff --git a/src/Form/AmiMultiStepIngest.php b/src/Form/AmiMultiStepIngest.php index 1a2cb35..2fda14b 100644 --- a/src/Form/AmiMultiStepIngest.php +++ b/src/Form/AmiMultiStepIngest.php @@ -74,12 +74,15 @@ public function __construct(PrivateTempStoreFactory $temp_store_factory, Session */ public function buildForm(array $form, FormStateInterface $form_state) { $form = parent::buildForm($form, $form_state); + $form['message-step'] = [ - '#markup' => '
' . $this->t('AMI step @step of @laststep',[ + '#markup' => '
' . $this->t('AMI step @step of @laststep', [ '@step' => $this->step, '@laststep' => $this->lastStep, ]) . '
', ]; + $plugin_instance = NULL; + $pluginValue = NULL; if ($this->step == 1) { $pluginValue = $this->store->get('plugin'); $definitions = $this->importerManager->getDefinitions(); @@ -87,6 +90,8 @@ public function buildForm(array $form, FormStateInterface $form_state) { foreach ($definitions as $id => $definition) { $options[$id] = $definition['label']; } + // Reset the plugin instance in case of code change. + $this->store->set('plugininstance', NULL); $form['plugin'] = [ '#type' => 'select', @@ -98,29 +103,32 @@ public function buildForm(array $form, FormStateInterface $form_state) { '#empty_option' => $this->t('- Please select a plugin -'), ]; } + if ($this->step > 1) { + //Only carry the plugin instance + /* @var $plugin_instance \Drupal\ami\Plugin\ImporterAdapterInterface | NULL */ + $plugin_instance = $this->store->get('plugininstance'); + $pluginValue = $this->store->get('plugin'); + } if ($this->step == 2) { $parents = ['pluginconfig']; $form_state->setValue('pluginconfig', $this->store->get('pluginconfig')); - $pluginValue = $this->store->get('plugin'); // Only create a new instance if we do not have the PluginInstace around - /* @var $plugin_instance \Drupal\ami\Plugin\ImporterAdapterInterface | NULL */ - $plugin_instance = $this->store->get('plugininstance'); if (!$plugin_instance || $plugin_instance->getPluginid() != $pluginValue || $pluginValue == NULL) { $configuration = []; $configuration['config'] = ImporterAdapter::create(); - $plugin_instance = $this->importerManager->createInstance($pluginValue,$configuration); - $this->store->set('plugininstance',$plugin_instance); + $plugin_instance = $this->importerManager->createInstance($pluginValue, $configuration); + $this->store->set('plugininstance', $plugin_instance); } $form['pluginconfig'] = $plugin_instance->interactiveForm($parents, $form_state); $form['pluginconfig']['#tree'] = TRUE; } - // TO keep this discrete and easier to edit maybe move to it's own method? + + // To keep this discrete and easier to edit maybe move to their own methods? if ($this->step == 3) { // We should never reach this point if data is not enough. Submit handler // will go back to Step 2 if so. $data = $this->store->get('data') ?? []; $pluginconfig = $this->store->get('pluginconfig'); - $plugin_instance = $this->store->get('plugininstance'); $op = $pluginconfig['op']; $column_keys = $plugin_instance->provideKeys($pluginconfig, $data); $mapping = $this->store->get('mapping'); @@ -129,13 +137,17 @@ public function buildForm(array $form, FormStateInterface $form_state) { 'template' => 'Template', ]; $template = $this->getMetadatadisplays(); - // $webform = $this->getWebforms(); $bundle = $this->getBundlesAndFields(); $global_metadata_options = $metadata + ['custom' => 'Custom (Expert Mode)']; //Each row (based on its type column) can have its own approach setup(expert mode) $element_conditional = []; $element = []; + // Get all headers and check for a 'type' key first, if not allow the user to select one? + // Wonder if we can be strict about this and simply require always a "type"? + // @TODO WE need to check for 'type' always. Maybe even in the submit handler? + $alltypes = $plugin_instance->provideTypes($pluginconfig, $data); + $element['bundle'] =[ '#type' => 'select', '#title' => $this->t('Fields and Bundles'), @@ -150,15 +162,6 @@ public function buildForm(array $form, FormStateInterface $form_state) { '#description' => $this->t('Columns will be casted to ADO metadata (JSON) using a Twig template setup for JSON output'), ]; - /** - * $element_conditional['webform'] = [ - * '#type' => 'select', - * '#title' => $this->t('Webform'), - * '#options' => $webform, - * '#description' => $this->t('Columns are casted to ADO metadata (JSON) by passing/validating Data through an existing Webform'), - * ]; - */ - $form['ingestsetup']['globalmapping'] = [ '#type' => 'select', '#title' => $this->t('Select the data transformation approach'), @@ -202,10 +205,6 @@ public function buildForm(array $form, FormStateInterface $form_state) { ], ]; - // Get all headers and check for a 'type' key first, if not allow the user to select one? - // Wonder if we can be strict about this and simply require always a "type"? - // @TODO WE need to check for 'type' always. Maybe even in the submit handler? - $alltypes = $plugin_instance->provideTypes($pluginconfig, $data); if (!empty($alltypes)) { $form['ingestsetup']['custommapping'] = [ '#type' => 'fieldset', @@ -296,7 +295,6 @@ public function buildForm(array $form, FormStateInterface $form_state) { $data = $this->store->get('data') ?? []; $pluginconfig = $this->store->get('pluginconfig'); $op = $pluginconfig['op']; - $plugin_instance = $this->store->get('plugininstance'); $column_keys = $plugin_instance->provideKeys($pluginconfig, $data); $column_options = array_combine($column_keys, $column_keys); $mapping = $this->store->get('mapping'); @@ -317,8 +315,6 @@ public function buildForm(array $form, FormStateInterface $form_state) { $node_description = $this->t('Columns that hold either other row numbers or UUIDs(an existing ADO) connecting ADOs between each other (e.g "ismemberof"). You can choose multiple.'); } - - $form['ingestsetup']['adomapping']['parents'] = [ '#type' => 'select', '#title' => $this->t('ADO Parent Columns'), @@ -429,6 +425,10 @@ public function buildForm(array $form, FormStateInterface $form_state) { '#default_value' => 'AMI Set of ' . $this->currentUser()->getDisplayName() ]; } + // Allow the plugin to alter the forms if needed in any way + if ($plugin_instance && $this->step > 1) { + $plugin_instance->stepFormAlter($form, $form_state, $this->store, $this->step); + } return $form; } @@ -473,28 +473,26 @@ public function validateMapping(array &$form, FormStateInterface $form_state) { public function submitForm(array &$form, FormStateInterface $form_state) { parent::submitForm($form, $form_state); if ($form_state->getValue('plugin', NULL)) { - if ($this->store->get('plugin') != $form_state->getValue('plugin', NULL)) { $this->store->set('pluginconfig',[]); } $this->store->set('plugin', $form_state->getValue('plugin')); - } if ($form_state->getValue('pluginconfig', [])) { $this->store->set('pluginconfig', $form_state->getValue('pluginconfig')); } + /* @var $plugin_instance \Drupal\ami\Plugin\ImporterAdapterInterface| NULL */ + $plugin_instance = $this->store->get('plugininstance'); // First data fetch step if ($this->step == 3) { $this->store->delete('data'); - /* @var $plugin_instance \Drupal\ami\Plugin\ImporterAdapterInterface| NULL */ - $plugin_instance = $this->store->get('plugininstance'); if ($plugin_instance) { $data = $plugin_instance->getInfo($this->store->get('pluginconfig'), $form_state,0,-1); // Check if the Plugin is ready processing or needs more data $ready = $form_state->getValue('pluginconfig')['ready'] ?? TRUE; $op = $form_state->getValue('pluginconfig')['op'] ?? 'create'; if (!$ready) { - // Back yo Step 2 until the Plugin is ready doing its thing. + // Back to Step 2 until the Plugin is ready doing its thing. $this->step = 2; $form_state->setRebuild(); } @@ -504,6 +502,7 @@ public function submitForm(array &$form, FormStateInterface $form_state) { // Total rows contains data without headers So a single one is good enough. if (is_array($data) && !empty($data) and isset($data['headers']) && ((count($data['headers']) >= 3) || (count($data['headers']) >= 2 && $op != 'create')) && isset($data['totalrows']) && $data['totalrows'] >= 1) { $this->store->set('data', $data); + $plugin_instance->alterStepStore($form_state, $this->store, $this->step); } else { // Not the data we are looking for? Back to Step 2. @@ -536,12 +535,18 @@ public function submitForm(array &$form, FormStateInterface $form_state) { ] ] ]); + if ($plugin_instance) { + $plugin_instance->alterStepStore($form_state, $this->store, $this->step); + } } } if ($this->step == 5) { if ($form_state->getTriggeringElement()['#name'] !== 'prev') { $adomapping = $form_state->getValue('adomapping'); $this->store->set('adomapping', $adomapping); + if ($plugin_instance) { + $plugin_instance->alterStepStore($form_state, $this->store, $this->step); + } } } if ($this->step == 6) { @@ -558,6 +563,9 @@ public function submitForm(array &$form, FormStateInterface $form_state) { } else { $this->store->set('zip', NULL); } + if ($plugin_instance) { + $plugin_instance->alterStepStore($form_state, $this->store, $this->step); + } $ami_set_label = $form_state->getValue('ami_set_label', NULL); $ami_set_label = $ami_set_label ? trim($ami_set_label) : $ami_set_label; $amisetdata = new \stdClass(); @@ -568,8 +576,6 @@ public function submitForm(array &$form, FormStateInterface $form_state) { $amisetdata->adomapping = $this->store->get('adomapping'); $amisetdata->zip = $this->store->get('zip'); $amisetdata->name = $ami_set_label; - /* @var $plugin_instance \Drupal\ami\Plugin\ImporterAdapterInterface| NULL */ - $plugin_instance = $this->store->get('plugininstance'); if ($plugin_instance) { if (!$plugin_instance->getPluginDefinition()['batch']) { $data = $plugin_instance->getData($this->store->get('pluginconfig'), @@ -653,6 +659,9 @@ public function submitForm(array &$form, FormStateInterface $form_state) { $form_state->setRebuild(TRUE); } else { if (!empty($batch)) { + if ($plugin_instance) { + $plugin_instance->alterStepStore($form_state, $this->store, $this->step); + } batch_set($batch); } } diff --git a/src/Form/amiSetEntityProcessForm.php b/src/Form/amiSetEntityProcessForm.php index 3513fab..6e54085 100644 --- a/src/Form/amiSetEntityProcessForm.php +++ b/src/Form/amiSetEntityProcessForm.php @@ -115,31 +115,6 @@ public function submitForm(array &$form, FormStateInterface $form_state) { } if ($file && $data !== new \stdClass()) { $invalid = []; - $info = $this->AmiUtilityService->preprocessAmiSet($file, $data, $invalid, FALSE); - // Means preprocess set - if (count($invalid)) { - $invalid_message = $this->formatPlural(count($invalid), - 'Source data Row @row had an issue, common cause is an invalid parent.', - '@count rows, @row, had issues, common causes are invalid parents and/or non existing referenced rows.', - [ - '@row' => implode(', ', array_keys($invalid)), - ] - ); - $this->messenger()->addWarning($invalid_message); - } - if (!count($info)) { - $this->messenger()->addError( - $this->t( - 'So Sorry. Ami Set @label produced no ADOs. Please correct your source CSV data.', - [ - '@label' => $this->entity->label(), - ] - ) - ); - $form_state->setRebuild(); - return; - } - $SetURL = $this->entity->toUrl('canonical', ['absolute' => TRUE]) ->toString(); @@ -169,34 +144,87 @@ public function submitForm(array &$form, FormStateInterface $form_state) { $op_secondary = $form_state->getValue(['ops_secondary','ops_secondary_update'], 'update'); $ops_safefiles = $form_state->getValue(['ops_secondary','ops_safefiles'], TRUE); } - foreach ($info as $item) { - // We set current User here since we want to be sure the final owner of - // the object is this and not the user that runs the queue - $data->info = [ + if ($notprocessnow) { + $data_csv = clone $data; + // Testing the CSV processor + $data_csv->info = [ 'zip_file' => $zip_file, - 'row' => $item, + 'csv_file' => $file, 'set_id' => $this->entity->id(), 'uid' => $this->currentUser()->id(), 'status' => $statuses, 'op_secondary' => $op_secondary, - 'ops_safefiles' => $ops_safefiles ? TRUE: FALSE, + 'ops_safefiles' => $ops_safefiles ? TRUE : FALSE, 'log_jsonpatch' => FALSE, 'set_url' => $SetURL, 'attempt' => 1, 'queue_name' => $queue_name, - 'force_file_queue' => (bool) $form_state->getValue('force_file_queue', FALSE), - 'force_file_process' => (bool) $form_state->getValue('force_file_process', FALSE), + 'force_file_queue' => (bool)$form_state->getValue('force_file_queue', FALSE), + 'force_file_process' => (bool)$form_state->getValue('force_file_process', FALSE), 'manyfiles' => $manyfiles, 'ops_skip_onmissing_file' => $ops_skip_onmissing_file, 'ops_forcemanaged_destination_file' => $ops_forcemanaged_destination_file, 'time_submitted' => $run_timestamp ]; - $added[] = \Drupal::queue($queue_name) - ->createItem($data); + \Drupal::queue('ami_csv_ado') + ->createItem($data_csv); + } + else { + $info = $this->AmiUtilityService->preprocessAmiSet($file, $data, $invalid, FALSE); + // Means preprocess set + if (count($invalid)) { + $invalid_message = $this->formatPlural(count($invalid), + 'Source data Row @row had an issue, common cause is an invalid parent.', + '@count rows, @row, had issues, common causes are invalid parents and/or non existing referenced rows.', + [ + '@row' => implode(', ', array_keys($invalid)), + ] + ); + $this->messenger()->addWarning($invalid_message); + } + if (!count($info)) { + $this->messenger()->addError( + $this->t( + 'So Sorry. Ami Set @label produced no ADOs. Please correct your source CSV data.', + [ + '@label' => $this->entity->label(), + ] + ) + ); + $form_state->setRebuild(); + return; + } + + + foreach ($info as $item) { + // We set current User here since we want to be sure the final owner of + // the object is this and not the user that runs the queue + $data->info = [ + 'zip_file' => $zip_file, + 'row' => $item, + 'set_id' => $this->entity->id(), + 'uid' => $this->currentUser()->id(), + 'status' => $statuses, + 'op_secondary' => $op_secondary, + 'ops_safefiles' => $ops_safefiles ? TRUE : FALSE, + 'log_jsonpatch' => FALSE, + 'set_url' => $SetURL, + 'attempt' => 1, + 'queue_name' => $queue_name, + 'force_file_queue' => (bool)$form_state->getValue('force_file_queue', FALSE), + 'force_file_process' => (bool)$form_state->getValue('force_file_process', FALSE), + 'manyfiles' => $manyfiles, + 'ops_skip_onmissing_file' => $ops_skip_onmissing_file, + 'ops_forcemanaged_destination_file' => $ops_forcemanaged_destination_file, + 'time_submitted' => $run_timestamp + ]; + $added[] = \Drupal::queue($queue_name) + ->createItem($data); + } + $count = count(array_filter($added)); } - $count = count(array_filter($added)); - if ($notprocessnow && $count) { + if ($notprocessnow) { $this->messenger()->addMessage( $this->t( 'Set @label enqueued and processed .', @@ -208,7 +236,8 @@ public function submitForm(array &$form, FormStateInterface $form_state) { $processed_set_status['processed'] = 0; $processed_set_status['errored'] = 0; - $processed_set_status['total'] = $count; + $processed_set_status['total'] = 0; + // So far here, with the new CSV enqueue plugin we have no idea how many. But the CSV queue entry will fill up the gap $this->statusStore->set('set_' . $this->entity->id(), $processed_set_status); $this->entity->setStatus(amiSetEntity::STATUS_ENQUEUED); $this->entity->save(); diff --git a/src/Plugin/ImporterAdapter/EADImporter.php b/src/Plugin/ImporterAdapter/EADImporter.php new file mode 100644 index 0000000..da1aab1 --- /dev/null +++ b/src/Plugin/ImporterAdapter/EADImporter.php @@ -0,0 +1,204 @@ + 'managed_file', + '#default_value' => $form_state->getValue(array_merge($parents , ['file'])), + '#title' => $this->t('Upload your file'), + '#description' => $this->t('The CSV file containing your EAD records.'), + '#required' => TRUE, + '#upload_location' => 'private://', + '#upload_validators' => [ + 'file_validate_extensions' => ['csv'], + ], + ]; + + return $form; + } + + + /** + * {@inheritdoc} + */ + public function getData(array $config, $page = 0, $per_page = 20): array { + $data = parent::getData($config, $page, $per_page); + $offset = $page * $per_page; + $tabdata = ['headers' => [], 'data' => $data, 'totalrows' => 0]; + + /* @var File $file */ + $file = $this->entityTypeManager->getStorage('file') + ->load($config['file'][0]); + if (!$file) { + $this->messenger()->addMessage( + $this->t( + 'Could not load the file. Please check your Drupal logs or contact your Repository Admin' + ) + ); + return $tabdata; + } + + $response = $this->AmiUtilityService->ensureFileAvailability($file); + + if ($response === TRUE) { + $file_path = $this->streamWrapperManager->getViaUri($file->getFileUri())->realpath(); + $this->streamWrapperManager->getViaUri($file->getFileUri())->getUri(); + } + elseif ($response === FALSE) { + $this->messenger()->addMessage( + $this->t( + 'Could not copy source file to a local location. Please check your Filesystem Permissions, Drupal logs or contact your Repository Admin' + ) + ); + return $tabdata; + } + else { + $this->tempFile = $response; + $file_path = $response; + } + try { + $inputFileType = IOFactory::identify($file_path); + // Because of \PhpOffice\PhpSpreadsheet\Cell\DataType::checkString we can + // Not use this library for CSVs that contain large JSONs + // Since we do not know if they contain that, we will + // assume so (maybe a user choice in the future) + return $this->AmiUtilityService->csv_read($file, 0, 0, TRUE) ?? $tabdata; + } + catch (\Exception $e) { + $this->messenger()->addMessage( + $this->t( + 'Could not parse file with error: @error', + ['@error' => $e->getMessage()] + ) + ); + return $tabdata; + } + } + + public function getInfo(array $config, FormStateInterface $form_state, $page = 0, $per_page = 20): array { + return $this->getData($config, $page, $per_page); + } + + public function provideTypes(array $config, array $data): array + { + // These are our discussed types. No flexibility here. + return [ + 'ArchiveContainer' => 'ArchiveContainer', + 'ArchiveComponent' => 'ArchiveComponent', + ]; + } + + /** + * Shutdown that "should" clean temp file if one was generated + */ + public function shutdown() { + // on PHP-FPM there will be never output of this one.. + if ($this->tempFile !== NULL) { + $this->AmiUtilityService->cleanUpTemp($this->tempFile); + } + } + + public function stepFormAlter(&$form, FormStateInterface $form_state, PrivateTempStore $store, $step): void + { + if ($step == 3) { + $form['ingestsetup']['globalmapping'] = [ + '#type' => 'select', + '#title' => $this->t('Select the data transformation approach'), + '#default_value' => 'custom', + '#options' => ['custom' => 'Custom (Expert Mode)'], + '#description' => $this->t('How your source data will be transformed into EADs Metadata.'), + '#required' => TRUE, + ]; + foreach ($form['ingestsetup']['custommapping'] ?? [] as $key => &$settings) { + if (strpos($key,'#') !== 0 && is_array($settings)) { + if ($settings['metadata']['#default_value'] ?? NULL) { + $form['ingestsetup']['custommapping'][$key]['metadata']['#default_value'] = 'template'; + $form['ingestsetup']['custommapping'][$key]['metadata']['#options'] = ['template' => 'Template']; + } + } + } + } + if ($step == 4) { + $current_options = $form['ingestsetup']['adomapping']['parents']['#options']; + $current_options['iscontainedby'] = 'iscontainedby (used by CSV child container)'; + // We add this just in case the top level does not has it. + $current_options['ispartof'] = 'ispartof'; + unset($current_options['node_uuid']); + if (empty($form['ingestsetup']['adomapping']['parents']['#default_value'])) { + $form['ingestsetup']['adomapping']['parents']['#default_value'] = ['ispartof', 'iscontainedby']; + } + $form['ingestsetup']['adomapping']['parents']['#options'] = $current_options; + $form['ingestsetup']['adomapping']['autouuid'] = [ + '#disabled' => TRUE, + '#default_value' => FALSE, + ]; + $form['ingestsetup']['adomapping']['uuid'] = [ + '#default_value' => 'node_uuid', + '#disabled' => TRUE, + ]; + foreach ($form['ingestsetup']['custommapping'] ?? [] as $key => &$settings) { + if (strpos($key,'#') !== 0 && is_array($settings)) { + if ($settings['metadata']['#default_value'] ?? NULL) { + $form['ingestsetup']['custommapping'][$key]['metadata']['#default_value'] = 'template'; + $form['ingestsetup']['custommapping'][$key]['metadata']['#options'] = ['template' => 'Template']; + } + } + } + } + $form = $form; + } + + /** + * @inheritDoc + */ + public function alterStepStore(FormStateInterface $form_state, PrivateTempStore $store, int $step = 1): void { + if ($step == 4) { + $mapping = $store->get('mapping'); + // We only set this for ArchiveComponent, that way we won't have nested of nested CSVs (means the container CSV + // won't have nested CSV again. We can. We won't + // Diego wake up! + if (isset($mapping['custommapping_settings']['ArchiveComponent'])) { + // Needs to be a an associative array bc we convert into object afterwards and access the files_csv as property + $mapping['custommapping_settings']['ArchiveComponent']['files_csv'] = ['dsc_csv' => 'dsc_csv']; + // Will be used by \Drupal\ami\Plugin\QueueWorker\IngestADOQueueWorker::processItem + } + $store->set('mapping', $mapping); + } + } +} diff --git a/src/Plugin/ImporterAdapter/GoogleSheetImporter.php b/src/Plugin/ImporterAdapter/GoogleSheetImporter.php index d7f4229..f8627f6 100644 --- a/src/Plugin/ImporterAdapter/GoogleSheetImporter.php +++ b/src/Plugin/ImporterAdapter/GoogleSheetImporter.php @@ -195,7 +195,6 @@ public function getData(array $config, $page = 0, $per_page = 20):array { MessengerInterface::TYPE_ERROR ); return $tabdata; - } } catch (Google_Service_Exception $e) { $this->messenger()->addMessage( diff --git a/src/Plugin/ImporterAdapter/SolrImporter.php b/src/Plugin/ImporterAdapter/SolrImporter.php index d062994..1ae254f 100644 --- a/src/Plugin/ImporterAdapter/SolrImporter.php +++ b/src/Plugin/ImporterAdapter/SolrImporter.php @@ -66,7 +66,7 @@ class SolrImporter extends SpreadsheetImporter { protected $httpClient; /** - * GoogleSheetImporter constructor. + * Solr Importer constructor. * * @param array $configuration * @param $plugin_id @@ -489,8 +489,6 @@ public function getInfo(array $config, FormStateInterface $form_state, $page = 0 } $filename_columns = array_unique(array_merge(static::FILE_COLUMNS,$filename_columns)); - - $solr_config = [ 'endpoint' => [ 'amiremote' => [ diff --git a/src/Plugin/ImporterAdapterBase.php b/src/Plugin/ImporterAdapterBase.php index c8c64a8..d857ede 100644 --- a/src/Plugin/ImporterAdapterBase.php +++ b/src/Plugin/ImporterAdapterBase.php @@ -13,6 +13,7 @@ use Drupal\Core\StringTranslation\StringTranslationTrait; use Drupal\ami\Entity\ImporterAdapterInterface; use Drupal\ami\Plugin\ImporterAdapterInterface as ImporterPluginAdapterInterface ; +use Drupal\Core\TempStore\PrivateTempStore; use Symfony\Component\DependencyInjection\ContainerInterface; use Drupal\file\Entity\File; @@ -159,5 +160,15 @@ public function provideTypes(array $config, array $data): array { return $alltypes; } + public function stepFormAlter(&$form, FormStateInterface $form_state, PrivateTempStore $store, $step): void + { + // TODO: Implement stepFormAlter() method. + } + + public function alterStepStore(FormStateInterface $form_state, PrivateTempStore $store, int $step = 1): void + { + // TODO: Implement alterStepStore() method. + } + } diff --git a/src/Plugin/ImporterAdapterInterface.php b/src/Plugin/ImporterAdapterInterface.php index 815fba6..74b031e 100644 --- a/src/Plugin/ImporterAdapterInterface.php +++ b/src/Plugin/ImporterAdapterInterface.php @@ -5,6 +5,7 @@ use Drupal\ami\Plugin\ImporterAdapterInterface as ImporterPluginAdapterInterface; use Drupal\Component\Plugin\PluginInspectionInterface; use Drupal\Core\Form\FormStateInterface; +use Drupal\Core\TempStore\PrivateTempStore; use Drupal\file\Entity\File; /** @@ -51,6 +52,22 @@ public function settingsForm(array $parents, FormStateInterface $form_state): ar public function interactiveForm(array $parents, FormStateInterface $form_state): array; + /** + * Allows the Step form to be altered by reference. + * + * @param $form + * @param FormStateInterface $form_state + * @param PrivateTempStore $store + * @param int $step + * @return array + * @see \Drupal\ami\Form\AmiMultiStepIngestBaseForm + * Each plugin is responsible for providing a Form step that is compatible with the + * AmiMultiStepIngestBaseForm + * + */ + public function stepFormAlter(&$form, FormStateInterface $form_state, PrivateTempStore $store, $step):void; + + /** * Get Data from the source * @@ -130,4 +147,15 @@ public function provideKeys(array $config, array $data):array; */ public function provideTypes(array $config, array $data):array; + /** + * During a Multistep Ingest Form Submit, we can alter any steps/generated data + * + * @param FormStateInterface $form_state + * @param PrivateTempStore $store + * @param int $step + * @return void + * @see \Drupal\ami\Form\AmiMultiStepIngestBaseForm + */ + public function alterStepStore(FormStateInterface $form_state, PrivateTempStore $store, int $step = 1):void; + } diff --git a/src/Plugin/QueueWorker/CsvADOQueueWorker.php b/src/Plugin/QueueWorker/CsvADOQueueWorker.php new file mode 100644 index 0000000..463aa96 --- /dev/null +++ b/src/Plugin/QueueWorker/CsvADOQueueWorker.php @@ -0,0 +1,167 @@ +getViaUri('private://')->getDirectoryPath(); + $handler = new StreamHandler($private_path . '/ami/logs/set' . $data->info['set_id'] . '.log', Logger::DEBUG); + $handler->setFormatter(new JsonFormatter()); + $log->pushHandler($handler); + // This will add the File logger not replace the DB + // We can not use addLogger because in a single PHP process multiple Queue items might be invoked + // And loggers are additive. Means i can end with a few duplicated entries! + // @TODO: i should inject this into the Containers but i wanted to keep + // it simple for now. + $this->loggerFactory->get('ami_file')->setLoggers([[$log]]); + + /* Data info for an CSV has this structure + $data->info = [ + 'csv_file' => The CSV File that will (or we hope so if well formed) generate multiple ADO Queue items + 'csv_file_name' => Only present if this is called not from the root + 'set_id' => The Set id + 'uid' => The User ID that processed the Set + 'set_url' => A direct URL to the set. + 'status' => Either a string (moderation state) or a 1/0 for published/unpublished if not moderated + 'op_secondary' => applies only to Update/Patch operations. Can be one of 'update','replace','append' + 'ops_safefiles' => Boolean, True if we will not allow files/mappings to be removed/we will keep them warm and safe + 'log_jsonpatch' => If for Update operations we will generate a single PER ADO Log with a full JSON Patch, + 'attempt' => The number of attempts to process. We always start with a 1 + 'zip_file' => Zip File/File Entity + 'queue_name' => because well ... we use Hydroponics too + 'force_file_queue' => defaults to false, will always treat files as separate queue items. + 'force_file_process' => defaults to false, will force all techmd and file fetching to happen from scratch instead of using cached versions. + 'manyfiles' => Number of files (passed by \Drupal\ami\Form\amiSetEntityProcessForm::submitForm) that will trigger queue processing for files, + 'ops_skip_onmissing_file' => Skips ADO operations if a passed/mapped file is not present, + 'ops_forcemanaged_destination_file' => Forces Archipelago to manage a files destination when the source matches the destination Schema (e.g S3), + 'time_submitted' => Timestamp on when the queue was send. All Entries will share the same + ]; + + Most of this data will simply be relayed to another queue item. + // This will simply go to an alternate processing on this same Queue Worker + // Just for files. + */ + $adodata = clone $data; + $adodata->info = NULL; + $added = []; + // @TODO discuss with Allison the idea that one could ingest with "AMI set" data but without an actual AMI set? + // That would require, e.g generating a fake $data->info['set_id'] + $csv_file = $data->info['csv_file'] ?? NULL; + if ($csv_file instanceof FileInterface) { + $invalid = []; + + // Note. We won't process the nested CSV here. This queue worker only takes a CSV and splits into smaller + // chunks. Basically what the \Drupal\ami\Form\amiSetEntityProcessForm::submitForm already does. + // But the ADO worker itself will (new code) extract a CSV and then again, enqueue back to this so this one can yet again + // split into smaller chuncks and so on. + $info = $this->AmiUtilityService->preprocessAmiSet($data->info['csv_file'], $data, $invalid, FALSE); + + if (!count($info)) { + //@TODO tell the user which CSV failed please? + $message = $this->t('So sorry. CSV @csv for @setid produced no ADOs. Please correct your source CSV data', [ + '@setid' => $data->info['set_id'], + '@csv' => $csv_file->getFilename(), + ]); + $this->loggerFactory->get('ami_file')->warning($message, [ + 'setid' => $data->info['set_id'] ?? NULL, + 'time_submitted' => $data->info['time_submitted'] ?? '', + ]); + return; + } + + foreach ($info as $item) { + // We set current User here since we want to be sure the final owner of + // the object is this and not the user that runs the queue + $adodata->info = [ + 'zip_file' => $data->info['zip_file'] ?? NULL, + 'row' => $item, + 'set_id' => $data->info['set_id'], + 'uid' => $data->info['uid'], + 'status' => $data->info['status'], + 'op_secondary' => $data->info['op_secondary'] ?? NULL, + 'ops_safefiles' => $data->info['ops_safefiles'] ? TRUE : FALSE, + 'log_jsonpatch' => FALSE, + 'set_url' => $data->info['set_url'], + 'attempt' => 1, + 'queue_name' => $data->info['queue_name'], + 'force_file_queue' => $data->info['force_file_queue'], + 'force_file_process' => $data->info['force_file_process'], + 'manyfiles' => $data->info['manyfiles'], + 'ops_skip_onmissing_file' => $data->info['ops_skip_onmissing_file'], + 'ops_forcemanaged_destination_file' => $data->info['ops_forcemanaged_destination_file'], + 'time_submitted' => $data->info['time_submitted'], + ]; + $added[] = \Drupal::queue($data->info['queue_name']) + ->createItem($adodata); + } + if (count($added)) { + $message = $this->t('CSV @csv for Set @setid was expanded to @count ADOs', [ + '@setid' => $data->info['set_id'], + '@csv' => $csv_file->getFilename(), + '@count' => count($added), + ]); + $this->loggerFactory->get('ami_file')->info($message, [ + 'setid' => $data->info['set_id'] ?? NULL, + 'time_submitted' => $data->info['time_submitted'] ?? '', + ]); + } + if (count($invalid)) { + $invalid_message = $this->formatPlural(count($invalid), + 'Source data Row @row had an issue, common cause is an invalid parent.', + '@count rows, @row, had issues, common causes are invalid parents and/or non existing referenced rows.', + [ + '@row' => implode(', ', array_keys($invalid)), + ] + ); + $this->loggerFactory->get('ami_file')->warning($invalid_message, [ + 'setid' => $data->info['set_id'] ?? NULL, + 'time_submitted' => $data->info['time_submitted'] ?? '', + ]); + } + $processed_set_status = $this->statusStore->get('set_' . $data->info['set_id']); + $processed_set_status['processed'] = $processed_set_status['processed'] ?? 0; + $processed_set_status['errored'] = $processed_set_status['errored'] ?? 0; + $processed_set_status['total'] = $processed_set_status['total'] ?? 0 + count($added); + $this->statusStore->set('set_' . $data->info['set_id'], $processed_set_status); + return; + } + else { + error_log('wrongly enqueued'); + } + // @TODO add a logger error saying it was enqueued as CSV but there was no CSV file to be found + return; + } +} diff --git a/src/Plugin/QueueWorker/IngestADOQueueWorker.php b/src/Plugin/QueueWorker/IngestADOQueueWorker.php index ef71bd1..c7c6d92 100644 --- a/src/Plugin/QueueWorker/IngestADOQueueWorker.php +++ b/src/Plugin/QueueWorker/IngestADOQueueWorker.php @@ -5,6 +5,7 @@ use Drupal\ami\AmiLoDService; use Drupal\ami\AmiUtilityService; use Drupal\ami\Entity\amiSetEntity; +use Drupal\Core\Datetime\DrupalDateTime; use Drupal\file\FileInterface; use Drupal\Core\Entity\EntityTypeManagerInterface; use Drupal\Core\Logger\LoggerChannelFactoryInterface; @@ -12,6 +13,8 @@ use Drupal\Core\Plugin\ContainerFactoryPluginInterface; use Drupal\Core\Queue\QueueWorkerBase; use Drupal\Core\StringTranslation\StringTranslationTrait; +use Drupal\strawberryfield\Event\StrawberryfieldFileEvent; +use Drupal\strawberryfield\StrawberryfieldEventType; use Drupal\strawberryfield\StrawberryfieldFilePersisterService; use Drupal\strawberryfield\StrawberryfieldUtilityService; use Monolog\Handler\StreamHandler; @@ -21,6 +24,7 @@ use Swaggest\JsonDiff\JsonDiff; use Drupal\strawberryfield\Tools\StrawberryfieldJsonHelper; use \Drupal\Core\TempStore\PrivateTempStoreFactory; +use Symfony\Component\EventDispatcher\EventDispatcherInterface; /** * Processes and Ingests each AMI Set CSV row. @@ -103,31 +107,39 @@ class IngestADOQueueWorker extends QueueWorkerBase implements ContainerFactoryPl * * @var array */ - private CONST OP_HUMAN = [ + protected CONST OP_HUMAN = [ 'create' => 'created', 'update' => 'updated', 'patch' => 'patched', ]; + /** + * The event dispatcher. + * + * @var EventDispatcherInterface + */ + protected EventDispatcherInterface $eventDispatcher; + /** * Constructor. * * @param array $configuration * @param string $plugin_id * @param mixed $plugin_definition - * @param \Drupal\Core\Entity\EntityTypeManagerInterface $entity_type_manager - * @param \Drupal\Core\Logger\LoggerChannelFactoryInterface $logger_factory - * @param \Drupal\strawberryfield\StrawberryfieldUtilityService $strawberryfield_utility_service - * @param \Drupal\ami\AmiUtilityService $ami_utility - * @param \Drupal\ami\AmiLoDService $ami_lod - * @param \Drupal\Core\Messenger\MessengerInterface $messenger - * @param \Drupal\strawberryfield\StrawberryfieldFilePersisterService $strawberry_filepersister - * @param \Drupal\Core\TempStore\PrivateTempStoreFactory $temp_store_factory + * @param EntityTypeManagerInterface $entity_type_manager + * @param LoggerChannelFactoryInterface $logger_factory + * @param StrawberryfieldUtilityService $strawberryfield_utility_service + * @param AmiUtilityService $ami_utility + * @param AmiLoDService $ami_lod + * @param MessengerInterface $messenger + * @param StrawberryfieldFilePersisterService $strawberry_filepersister + * @param PrivateTempStoreFactory $temp_store_factory + * @param EventDispatcherInterface $event_dispatcher */ public function __construct( array $configuration, - $plugin_id, - $plugin_definition, + $plugin_id, + $plugin_definition, EntityTypeManagerInterface $entity_type_manager, LoggerChannelFactoryInterface $logger_factory, StrawberryfieldUtilityService $strawberryfield_utility_service, @@ -135,7 +147,8 @@ public function __construct( AmiLoDService $ami_lod, MessengerInterface $messenger, StrawberryfieldFilePersisterService $strawberry_filepersister, - PrivateTempStoreFactory $temp_store_factory + PrivateTempStoreFactory $temp_store_factory, + EventDispatcherInterface $event_dispatcher ) { parent::__construct($configuration, $plugin_id, $plugin_definition); $this->entityTypeManager = $entity_type_manager; @@ -147,6 +160,7 @@ public function __construct( $this->strawberryfilepersister = $strawberry_filepersister; $this->store = $temp_store_factory->get('ami_queue_worker_file'); $this->statusStore = $temp_store_factory->get('ami_queue_status'); + $this->eventDispatcher = $event_dispatcher; } /** @@ -162,8 +176,8 @@ public function __construct( public static function create( ContainerInterface $container, array $configuration, - $plugin_id, - $plugin_definition + $plugin_id, + $plugin_definition ) { return new static( empty($configuration) ? [] : $configuration, @@ -176,7 +190,8 @@ public static function create( $container->get('ami.lod'), $container->get('messenger'), $container->get('strawberryfield.file_persister'), - $container->get('tempstore.private') + $container->get('tempstore.private'), + $container->get('event_dispatcher') ); } @@ -395,16 +410,29 @@ public function processItem($data) { if ($data->mapping->globalmapping == "custom") { $file_object = $data->mapping->custommapping_settings->{$data->info['row']['type']}->files ?? NULL; + $csv_file_object = $data->mapping->custommapping_settings->{$data->info['row']['type']}->files_csv ?? NULL; } else { $file_object = $data->mapping->globalmapping_settings->files ?? NULL; + $csv_file_object = $data->mapping->globalmapping_settings->files_csv ?? NULL; } $file_columns = []; + $file_csv_columns = []; + + $ado_columns = []; if ($file_object && is_object($file_object)) { $file_columns = array_values(get_object_vars($file_object)); } + // CSV (nested ones) can not be processed as "pre-files", but still need to be processed as files. + // There might be an edge case where the user decides that the CSV that generated the children + // Should also be attached to the parent ADO.Still, we need to be sure the ADO itself was ingesed + // before treating the CSV as a source for children objects. + + if ($csv_file_object && is_object($csv_file_object)) { + $file_csv_columns = array_values(get_object_vars($csv_file_object)); + } if ($ado_object && is_object($ado_object)) { $ado_columns = array_values(get_object_vars($ado_object)); @@ -543,7 +571,7 @@ public function processItem($data) { } if ($process_files_via_queue && empty($data->info['waiting_for_files'])) { - // If so we need to push this one to the end.. + // If so we need to push this one to the end. // Reset the attempts $data->info['waiting_for_files'] = TRUE; $data->info['attempt'] = $data->info['attempt'] ? $data->info['attempt'] + 1 : 0; @@ -565,7 +593,38 @@ public function processItem($data) { return; } // Only persist if we passed this. - $this->persistEntity($data, $processed_metadata); + // True if all ok, to the best of our knowledge of course + $persisted = $this->persistEntity($data, $processed_metadata); + // We only process a CSV column IF and only if the row that contains it generated an ADO. + if ($persisted && !empty($file_csv_columns)) { + $current_uuid = $data->info['row']['uuid'] ?? NULL; + $current_row_id = $data->info['row']['row_id'] ?? NULL; + $data_csv = clone $data; + unset($data_csv->info['row']); + foreach ($file_csv_columns as $file_csv_column) { + if (isset($data->info['row']['data'][$file_csv_column]) && strlen(trim($data->info['row']['data'][$file_csv_column])) >= 5) { + $filenames = trim($data->info['row']['data'][$file_csv_column]); + $filenames = array_map(function($value) { + $value = $value ?? ''; + return trim($value); + }, explode(';', $filenames)); + $filenames = array_filter($filenames); + // We will keep the original row ID, so we can log it. + $data_csv->info['row']['row_id'] = $current_row_id; + foreach($filenames as $filename) { + $data_csv->info['csv_filename'] = $filename; + $csv_file = $this->processCSvFile($data_csv); + if ($csv_file) { + $data_csv->info['csv_file'] = $csv_file; + // Push to the CSV queue + \Drupal::queue('ami_csv_ado') + ->createItem($data_csv); + } + } + } + } + } + return; } @@ -601,7 +660,7 @@ private function isRemote($uri) { private function persistEntity(\stdClass $data, array $processed_metadata) { if (!$this->canProcess($data)) { - return; + return FALSE; } //OP can be one of: @@ -636,7 +695,7 @@ private function persistEntity(\stdClass $data, array $processed_metadata) { 'time_submitted' => $data->info['time_submitted'] ?? '', ]); $this->setStatus(amiSetEntity::STATUS_PROCESSING_WITH_ERRORS, $data); - return; + return FALSE; } $bundle = $property_path_split[0]; @@ -650,12 +709,15 @@ private function persistEntity(\stdClass $data, array $processed_metadata) { $status = $data->info['status'][$bundle] ?? 0; // default Sortfile which will respect the ingest order. If there was already one set, preserve. $sort_files = isset($processed_metadata['ap:tasks']) && isset($processed_metadata['ap:tasks']['ap:sortfiles']) ? $processed_metadata['ap:tasks']['ap:sortfiles'] : 'index'; - if (isset($processed_metadata['ap:tasks']) && is_array($processed_metadata['ap:tasks'])) { - $processed_metadata['ap:tasks']['ap:sortfiles'] = $sort_files; - } - else { - $processed_metadata['ap:tasks'] = []; - $processed_metadata['ap:tasks']['ap:sortfiles'] = $sort_files; + // We can't blindly override ap:tasks if we are dealing with an update operation. So make an exception here for only create + // And deal with the same for update but later + if ($op ==='create') { + if (isset($processed_metadata['ap:tasks']) && is_array($processed_metadata['ap:tasks'])) { + $processed_metadata['ap:tasks']['ap:sortfiles'] = $sort_files; + } else { + $processed_metadata['ap:tasks'] = []; + $processed_metadata['ap:tasks']['ap:sortfiles'] = $sort_files; + } } // JSON_ENCODE AGAIN! @@ -768,7 +830,6 @@ private function persistEntity(\stdClass $data, array $processed_metadata) { $processed_metadata = $original_value; } - if (isset($data->info['log_jsonpatch']) && $data->info['log_jsonpatch']) { $this->patchJson($original_value ?? [], $processed_metadata ?? [], true); } @@ -832,8 +893,18 @@ function str_starts_with($haystack, $needle) { } $processed_metadata = $original_value; } - // @TODO. Log this? - // $this->patchJson($original_value, $processed_metadata); + + // Now deal again with ap:tasks only if the replace/append operation stripped the basics out + + if (isset($processed_metadata['ap:tasks']) && is_array($processed_metadata['ap:tasks'])) { + // basically reuse what is there (which at this stage will be a mix of original data and new or default to the $sort file defined before + $processed_metadata['ap:tasks']['ap:sortfiles'] = $processed_metadata['ap:tasks']['ap:sortfiles'] ?? $sort_files; + } else { + // Only set if after all the options it was removed at all. + $processed_metadata['ap:tasks'] = []; + $processed_metadata['ap:tasks']['ap:sortfiles'] = $sort_files; + } + $itemfield->setMainValueFromArray($processed_metadata); break; @@ -877,6 +948,7 @@ function str_starts_with($haystack, $needle) { 'time_submitted' => $data->info['time_submitted'] ?? '', ]); $this->setStatus(amiSetEntity::STATUS_PROCESSING, $data); + return TRUE; } catch (\Exception $exception) { $message = $this->t('Sorry we did all right but failed @ophuman the ADO with UUID @uuid on Set @setid. Something went wrong. Please check your Drupal Logs and notify your admin.',[ @@ -889,7 +961,7 @@ function str_starts_with($haystack, $needle) { 'time_submitted' => $data->info['time_submitted'] ?? '', ]); $this->setStatus(amiSetEntity::STATUS_PROCESSING_WITH_ERRORS, $data); - return; + return FALSE; } } else { @@ -903,6 +975,7 @@ function str_starts_with($haystack, $needle) { 'time_submitted' => $data->info['time_submitted'] ?? '', ]); $this->setStatus(amiSetEntity::STATUS_PROCESSING_WITH_ERRORS, $data); + return FALSE; } } @@ -1023,6 +1096,44 @@ protected function processFile($data) { } } + /** + * Processes a CSV File without technical metadata. This is just for the purpose of input for the CSV queue worker + * + * @param mixed $data + */ + protected function processCSvFile($data): \Drupal\Core\Entity\EntityInterface|\Drupal\file\Entity\File|null + { + if (!($data->info['csv_filename'] ?? NULL)) { + return NULL; + } + $file = $this->AmiUtilityService->file_get(trim($data->info['csv_filename']), + $data->info['zip_file'] ?? NULL, TRUE); + if ($file) { + $event_type = StrawberryfieldEventType::TEMP_FILE_CREATION; + $current_timestamp = (new DrupalDateTime())->getTimestamp(); + $event = new StrawberryfieldFileEvent($event_type, 'ami', $file->getFileUri(), $current_timestamp); + // This will allow the extracted CSV from the zip to be composted, even if it was not a CSV. + // IN a queue by \Drupal\strawberryfield\EventSubscriber\StrawberryfieldEventCompostBinSubscriber + $this->eventDispatcher->dispatch($event, $event_type); + } + if ($file && $file->getMimeType() == 'text/csv') { + return $file; + } + else { + $message = $this->t('The referenced nested CSV @filename on row id @rowid from Set @setid could not be found or had the wrong format. Skipping', + [ + '@setid' => $data->info['set_id'], + '@filename' => $data->info['csv_filename'], + '@rowid' => $data->info['row']['row_id'] ?? '0', + ]); + $this->loggerFactory->get('ami_file')->warning($message ,[ + 'setid' => $data->info['set_id'] ?? NULL, + 'time_submitted' => $data->info['time_submitted'] ?? '', + ]); + return NULL; + } + } + /** * Checks if processing can be done so we can bail out sooner. @@ -1101,7 +1212,7 @@ private function canProcess($data): bool { * @param string $status * @param \stdClass $data */ - private function setStatus(string $status, \stdClass $data) { + protected function setStatus(string $status, \stdClass $data) { try { $set_id = $data->info['set_id']; if (!empty($set_id)) {