generate_key_value_pairs fix

rdziewietin · rdziewietin · commit a27556e2326e · 2024-08-06T15:44:02.000-04:00
diff --git a/app_doctr.py b/app_doctr.py
@@ -447,7 +447,7 @@ def get_period():
                 print(final_dfs)
                 key_value_pairs = []
                 for df in final_dfs:
-                    key_value_pairs.extend(post_processing.generate_key_value_pairs(df))
+                    key_value_pairs.extend(dhis2.generate_key_value_pairs(df))
                 st.write("Completed")
                 
                 st.session_state.data_payload = json_export(key_value_pairs)
diff --git a/app_llm.py b/app_llm.py
@@ -503,7 +503,7 @@ def authenticate():
 
                         key_value_pairs = []
                         for df in final_dfs:
-                            key_value_pairs.extend(post_processing.generate_key_value_pairs(df, form))
+                            key_value_pairs.extend(dhis2.generate_key_value_pairs(df, form))
                         
                         st.session_state.data_payload = json_export(key_value_pairs)
 
diff --git a/src/msfocr/data/post_processing.py b/src/msfocr/data/post_processing.py
@@ -41,54 +41,6 @@ def get_yyyy_mm_dd(text):
     return None  # Return None if text is not a valid date in any format
 
 
-def generate_key_value_pairs(table, form):
-    """
-    Generates key-value pairs in the format required to upload data to DHIS2.
-    {'dataElement': data_element_id,
-     'categoryOptionCombo': category_id,
-     'value': cell_value}
-     UIDs like data_element_id, category_id are obtained by querying the DHIS2 metadata.
-    :param table: DataFrame generated from table detection
-    :return: List of key value pairs as shown above.
-    """ 
-    data_element_pairs = []
-
-    # Iterate over each cell in the DataFrame
-    table_array = table.values
-    columns = table.columns
-    for row_index in range(table_array.shape[0]):
-        # Row name in tally sheet
-        data_element = table_array[row_index][0]
-        for col_index in range(1, table_array.shape[1]):
-            # Column name in tally sheet
-            category = columns[col_index]
-            cell_value = table_array[row_index][col_index]
-            if cell_value is not None and cell_value!="-" and cell_value!="":
-                data_element_id = None
-                category_id = None
-                # Search for the string in the "label" field of form information
-                string_search = data_element + " " + category
-                for group in form['groups']:
-                    for field in group['fields']:
-                        if field['label']==string_search:
-                            data_element_id = field['dataElement']
-                            category_id = field['categoryOptionCombo']
-                
-                # The following exceptions will be raised if the row or column name in the tally sheet is different from the names used in metadata
-                # For eg. Pop1: Resident is called Population 1 in metadata
-                # If this exception is raised the only way forward is for the user to manually change the row/column name to the one used in metadata
-                if data_element_id is None or category_id is None:
-                    raise Exception(f"Unable to find {string_search} in DHIS2 metadata")
-                # Append to the list of data elements to be push to DHIS2
-                data_element_pairs.append(
-                    {"dataElement": data_element_id,
-                    "categoryOptionCombo": category_id,
-                    "value": cell_value}
-                    )
-
-    return data_element_pairs
-
-            
 def evaluate_cells(table_dfs):
     """Uses simple_eval to perform math operations on each cell, defaulting to input if failed.
 
diff --git a/tests/test_data_dhis2.py b/tests/test_data_dhis2.py
@@ -1,4 +1,7 @@
-from msfocr.data.dhis2 import getAllUIDs
+import pandas as pd
+
+from msfocr.data.dhis2 import getAllUIDs, generate_key_value_pairs
+from msfocr.data import post_processing
 
 def test_getAllUIDs(test_server_config, requests_mock):
     requests_mock.get("http://test.com/api/categoryOptions?filter=name:ilike:12-59m", json={'categoryOptions': [{'id': 'tWRttYIzvBn', 'displayName': '12-59m'}]})
@@ -7,3 +10,50 @@ def test_getAllUIDs(test_server_config, requests_mock):
 
     assert expected_result == result
 
+
+def test_generate_key_value_pairs(test_server_config, requests_mock):
+    """
+    Tests if the dataElement value in the key-value pairs is correct by providing sample tablular data.
+    """
+    df = pd.DataFrame({
+        '0': ['Paed (0-59m) vacc target population'],
+        '0-11m': [None],
+        '12-59m': [None],
+        '5-14y': [None]
+    })
+
+    assert len(generate_key_value_pairs(df, {'groups': [{'fields':[{"label": "Paed (0-59m) vacc target population 0-11m",
+                    "dataElement": "paedid",
+                    "categoryOptionCombo": "0to11mid",
+                    "type": "INTEGER_POSITIVE"}]}]})) == 0
+
+    df = pd.DataFrame({
+        '0': ['BCG', 'Polio (OPV) 0 (birth dose)', 'Polio (OPV) 1 (from 6 wks)'],
+        '0-11m': ['45+29', None, '30+18'],
+        '12-59m': [None, None, '55+29'],
+        '5-14y': [None, None, None]
+    })
+    
+    answer = [{'dataElement': 'bcgid', 'categoryOptions': '0to11mid', 'value': '45+29'},
+              {'dataElement': 'polioid', 'categoryOptions': '0to11mid', 'value': '30+18'},
+              {'dataElement': 'polioid', 'categoryOptions': '5to14yid', 'value': '55+29'}]
+
+    data_element_pairs = generate_key_value_pairs(df, 
+                    {'groups': [{'fields':[{"label": "BCG 0-11m",
+                    "dataElement": "bcgid",
+                    "categoryOptionCombo": "0to11mid",
+                    "type": "INTEGER_POSITIVE"}]},
+                    {'fields':[{"label": "Polio (OPV) 1 (from 6 wks) 0-11m",
+                    "dataElement": "polioid",
+                    "categoryOptionCombo": "0to11mid",
+                    "type": "INTEGER_POSITIVE"}]},
+                    {'fields':[{"label": "Polio (OPV) 1 (from 6 wks) 12-59m",
+                    "dataElement": "polioid",
+                    "categoryOptionCombo": "5to14yid",
+                    "type": "INTEGER_POSITIVE"}]}]})
+    
+    assert len(data_element_pairs) == len(answer)
+
+    for i in range(len(data_element_pairs)):
+        assert data_element_pairs[i]['value'] == answer[i]['value']
+        
diff --git a/tests/test_data_post_processing.py b/tests/test_data_post_processing.py
@@ -3,52 +3,6 @@
 
 from msfocr.data import post_processing
 
-def test_generate_key_value_pairs(test_server_config, requests_mock):
-    """
-    Tests if the dataElement value in the key-value pairs is correct by providing sample tablular data.
-    """
-    df = pd.DataFrame({
-        '0': ['Paed (0-59m) vacc target population'],
-        '0-11m': [None],
-        '12-59m': [None],
-        '5-14y': [None]
-    })
-
-    assert len(post_processing.generate_key_value_pairs(df, {'groups': [{'fields':[{"label": "Paed (0-59m) vacc target population 0-11m",
-                    "dataElement": "paedid",
-                    "categoryOptionCombo": "0to11mid",
-                    "type": "INTEGER_POSITIVE"}]}]})) == 0
-
-    df = pd.DataFrame({
-        '0': ['BCG', 'Polio (OPV) 0 (birth dose)', 'Polio (OPV) 1 (from 6 wks)'],
-        '0-11m': ['45+29', None, '30+18'],
-        '12-59m': [None, None, '55+29'],
-        '5-14y': [None, None, None]
-    })
-    
-    answer = [{'dataElement': 'bcgid', 'categoryOptions': '0to11mid', 'value': '45+29'},
-              {'dataElement': 'polioid', 'categoryOptions': '0to11mid', 'value': '30+18'},
-              {'dataElement': 'polioid', 'categoryOptions': '5to14yid', 'value': '55+29'}]
-
-    data_element_pairs = post_processing.generate_key_value_pairs(df, 
-                    {'groups': [{'fields':[{"label": "BCG 0-11m",
-                    "dataElement": "bcgid",
-                    "categoryOptionCombo": "0to11mid",
-                    "type": "INTEGER_POSITIVE"}]},
-                    {'fields':[{"label": "Polio (OPV) 1 (from 6 wks) 0-11m",
-                    "dataElement": "polioid",
-                    "categoryOptionCombo": "0to11mid",
-                    "type": "INTEGER_POSITIVE"}]},
-                    {'fields':[{"label": "Polio (OPV) 1 (from 6 wks) 12-59m",
-                    "dataElement": "polioid",
-                    "categoryOptionCombo": "5to14yid",
-                    "type": "INTEGER_POSITIVE"}]}]})
-    
-    assert len(data_element_pairs) == len(answer)
-
-    for i in range(len(data_element_pairs)):
-        assert data_element_pairs[i]['value'] == answer[i]['value']
-        
         
 def test_evaluate_cells():
     """