ConservationMetrics · rudokemper · Jul 31, 2025 · Jul 31, 2025 · Jul 31, 2025 · Jul 31, 2025
diff --git a/README.md b/README.md
@@ -19,6 +19,7 @@ Some of the tools available in the Guardian Connector Scripts Hub are:
 * A flow to download and store GeoJSON and GeoTIFF change detection alerts, post these to a CoMapeo Archive Server 
   API, and send a message to WhatsApp recipients via Twilio.
 * Scripts to export data from a database into a specific format (e.g., GeoJSON).
+* An app to import and transform datasets from a variety of file formats and sources into a PostgreSQL database.
 
 ![Available scripts, flows, and apps in gc-scripts-hub](gc-scripts-hub.jpg)
 _A Windmill Workspace populated with some of the tools in this repository._

diff --git a/f/apps/gc_dataset_importer.app/0_form_stepper_validations.inline_script.frontend.js b/f/apps/gc_dataset_importer.app/0_form_stepper_validations.inline_script.frontend.js
@@ -0,0 +1,16 @@
+const { currentStepIndex, lastAction } = formStepper;
+
+// Step 1: Dataset name must be valid
+if (currentStepIndex === 0 && !state.datasetAvailable) {
+  throw new Error("Please enter a valid dataset name to proceed.");
+}
+
+// Step 2: File must be uploaded
+if (currentStepIndex === 1 && lastAction === "next" && !state.uploadSuccess) {
+  throw new Error("Please upload your file to proceed.");
+}
+
+// Step 4: Can't reuse same session
+if (currentStepIndex === 3 && state.finalizeSuccess) {
+  throw new Error("Please refresh the page to upload another file.");
+}
diff --git a/f/apps/gc_dataset_importer.app/1_check_if_database_table_exists.inline_script.lock b/f/apps/gc_dataset_importer.app/1_check_if_database_table_exists.inline_script.lock
@@ -0,0 +1,2 @@
+# py313
+psycopg2-binary==2.9.10
diff --git a/f/apps/gc_dataset_importer.app/1_check_if_database_table_exists.inline_script.py b/f/apps/gc_dataset_importer.app/1_check_if_database_table_exists.inline_script.py
@@ -0,0 +1,14 @@
+from f.common_logic.db_operations import check_if_table_exists, conninfo, postgresql
+from f.common_logic.identifier_utils import normalize_identifier
+
+
+def main(db: postgresql, dataset_name: str):
+    valid_sql_name = normalize_identifier(dataset_name)
+
+    table_exists = check_if_table_exists(conninfo(db), valid_sql_name)
+
+    return {
+        "tableExists": table_exists,
+        "datasetName": dataset_name,
+        "validSqlName": valid_sql_name,
+    }
diff --git a/f/apps/gc_dataset_importer.app/1_eval_of_enterdatasetnameresult.inline_script.frontend.js b/f/apps/gc_dataset_importer.app/1_eval_of_enterdatasetnameresult.inline_script.frontend.js
@@ -0,0 +1,8 @@
+switch (state.datasetAvailable) {
+  case true:
+    return `✅ Dataset name is available! The database table name will be "${state.validSqlname}".`;
+  case false:
+    return "⚠️ Dataset name is already in usage.";
+  default:
+    return "";
+}
diff --git a/f/apps/gc_dataset_importer.app/2_eval_of_fileuploaded.inline_script.frontend.js b/f/apps/gc_dataset_importer.app/2_eval_of_fileuploaded.inline_script.frontend.js
@@ -0,0 +1 @@
+return state.fileNameOriginal
diff --git a/...adbuttonenabled.inline_script.frontend.js → ...s_been_selected.inline_script.frontend.js b/...adbuttonenabled.inline_script.frontend.js → ...s_been_selected.inline_script.frontend.js
@@ -1 +1,2 @@
+state.uploadSuccess = false;
 state.uploadButtonEnabled = selectFile?.result ? true : false;
diff --git a/f/apps/gc_dataset_importer.app/2_toggle_state.uploadsuccess.inline_script.frontend.js b/f/apps/gc_dataset_importer.app/2_toggle_state.uploadsuccess.inline_script.frontend.js
@@ -0,0 +1,3 @@
+// if (selectFile.result && uploadFile.result && !uploadFile.result.error) {
+//  state.uploadSuccess = true;
+// }
diff --git a/f/apps/gc_dataset_importer.app/2_update_result_message.inline_script.frontend.js b/f/apps/gc_dataset_importer.app/2_update_result_message.inline_script.frontend.js
@@ -0,0 +1,7 @@
+if (state.uploadButtonEnabled && state.uploadSuccess) {
+  return "✅ File successfully uploaded to temporary storage! Please proceed to the next step to finish writing the data to the warehouse."
+} else if (state.uploadButtonEnabled && !state.uploadSuccess && state.uploadErrorMessage) { 
+  return `❌ ${state.uploadErrorMessage}`
+} else {
+  return ""
+}
diff --git a/f/apps/gc_dataset_importer.app/2_upload_and_convert_file.inline_script.lock b/f/apps/gc_dataset_importer.app/2_upload_and_convert_file.inline_script.lock
@@ -0,0 +1,16 @@
+# py313
+attrs==25.3.0
+certifi==2025.8.3
+click==8.2.1
+click-plugins==1.1.1.2
+cligj==0.7.2
+et-xmlfile==2.0.0
+filetype==1.2.0
+fiona==1.10.1
+numpy==2.3.2
+openpyxl==3.1.5
+pandas==2.3.1
+python-dateutil==2.9.0.post0
+pytz==2025.2
+six==1.17.0
+tzdata==2025.2
diff --git a/f/apps/gc_dataset_importer.app/2_upload_and_convert_file.inline_script.py b/f/apps/gc_dataset_importer.app/2_upload_and_convert_file.inline_script.py
@@ -0,0 +1,85 @@
+import csv
+import json
+import logging
+from io import StringIO
+from pathlib import Path
+
+from f.common_logic.data_conversion import convert_data, detect_structured_data_type
+from f.common_logic.file_operations import save_uploaded_file_to_temp
+
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+
+
+def main(uploaded_file, dataset_name):
+    """
+    Process uploaded file and convert to standardized format.
+
+    Takes an uploaded file, detects its format, and converts it to either CSV or GeoJSON
+    depending on the data type. Saves both original and converted files to a dataset-specific
+    temporary directory for further processing.
+
+    Parameters
+    ----------
+    uploaded_file : object or list
+        File object or list containing uploaded file data.
+    dataset_name : str
+        Name of the dataset, used for creating temp directory paths.
+
+    Returns
+    -------
+    tuple[bool, str | None, str | None, str | None]
+        A tuple containing (success, error_message, output_filename, output_format):
+        - success : bool
+            True if processing completed successfully, False if an error occurred.
+        - error_message : str or None
+            Error message if success is False, None if success is True.
+        - output_filename : str or None
+            Name of the converted file with '_parsed' suffix if successful, None if failed.
+        - output_format : str or None
+            Format of converted file ('csv' or 'geojson') if successful, None if failed.
+    """
+    try:
+        logger.info(f"Starting file upload and conversion for dataset: {dataset_name}")
+
+        temp_dir = Path(f"/persistent-storage/tmp/{dataset_name}")
+        temp_dir.mkdir(parents=True, exist_ok=True)
+        logger.info(f"Created dataset temp directory: {temp_dir}")
+
+        saved_input = save_uploaded_file_to_temp(uploaded_file, tmp_dir=str(temp_dir))
+        input_path = saved_input["file_paths"][0]
+        logger.info(f"Saved original file to: {input_path}")
+
+        file_format = detect_structured_data_type(input_path)
+        logger.info(f"Detected file format: {file_format}")
+
+        converted_data, output_format = convert_data(input_path, file_format)
+        logger.info(f"Converted to format: {output_format}")
+
+        output_filename = f"{Path(input_path).stem}_parsed.{output_format}"
+
+        if output_format == "csv":
+            output = StringIO()
+            writer = csv.writer(output)
+            writer.writerows(converted_data)
+            csv_data = output.getvalue()
+
+            file_to_save = [{"name": output_filename, "data": csv_data}]
+        else:  # geojson
+            file_to_save = [
+                {"name": output_filename, "data": json.dumps(converted_data)}
+            ]
+
+        saved_output = save_uploaded_file_to_temp(
+            file_to_save, is_base64=False, tmp_dir=str(temp_dir)
+        )
+        output_path = saved_output["file_paths"][0]
+        logger.info(f"Saved parsed file to: {output_path}")
+
+        # Return success
+        return True, None, output_filename, output_format
+
+    except Exception as e:
+        error_msg = f"Error during file upload and conversion: {e}"
+        logger.error(error_msg)
+        return False, error_msg, None, None
diff --git a/f/apps/gc_dataset_importer.app/3_eval_of_datasource.inline_script.frontend.js b/f/apps/gc_dataset_importer.app/3_eval_of_datasource.inline_script.frontend.js
@@ -0,0 +1,5 @@
+if (dataSourceToggle.result && state.dataSource) {
+  return state.dataSource
+} else {
+  return "None selected"
+}
diff --git a/f/apps/gc_dataset_importer.app/3_set_data_source_state.inline_script.frontend.js b/f/apps/gc_dataset_importer.app/3_set_data_source_state.inline_script.frontend.js
@@ -0,0 +1,5 @@
+if (dataSourceToggle.result) {
+  state.dataSource = dataSources.result;
+} else if (!dataSourceToggle.result) {
+  state.dataSource = undefined;
+}
diff --git a/...s/gc_dataset_importer.app/4_apply_transformation_and_write_to_database.inline_script.lock b/...s/gc_dataset_importer.app/4_apply_transformation_and_write_to_database.inline_script.lock
@@ -0,0 +1,13 @@
+# py313
+annotated-types==0.7.0
+certifi==2025.8.3
+charset-normalizer==3.4.2
+idna==3.10
+psycopg2-binary==2.9.10
+pydantic==2.9.2
+pydantic-core==2.23.4
+pyodk==1.2.1
+requests==2.32.3
+toml==0.10.2
+typing-extensions==4.14.1
+urllib3==2.5.0
Original file line number	Diff line number	Diff line change
		@@ -1 +1,2 @@
		state.uploadSuccess = false;
		state.uploadButtonEnabled = selectFile?.result ? true : false;