Skip to content

Commit a27556e

Browse files
committed
generate_key_value_pairs fix
1 parent 3274c11 commit a27556e

File tree

5 files changed

+53
-97
lines changed

5 files changed

+53
-97
lines changed

app_doctr.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -447,7 +447,7 @@ def get_period():
447447
print(final_dfs)
448448
key_value_pairs = []
449449
for df in final_dfs:
450-
key_value_pairs.extend(post_processing.generate_key_value_pairs(df))
450+
key_value_pairs.extend(dhis2.generate_key_value_pairs(df))
451451
st.write("Completed")
452452

453453
st.session_state.data_payload = json_export(key_value_pairs)

app_llm.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -503,7 +503,7 @@ def authenticate():
503503

504504
key_value_pairs = []
505505
for df in final_dfs:
506-
key_value_pairs.extend(post_processing.generate_key_value_pairs(df, form))
506+
key_value_pairs.extend(dhis2.generate_key_value_pairs(df, form))
507507

508508
st.session_state.data_payload = json_export(key_value_pairs)
509509

src/msfocr/data/post_processing.py

-48
Original file line numberDiff line numberDiff line change
@@ -41,54 +41,6 @@ def get_yyyy_mm_dd(text):
4141
return None # Return None if text is not a valid date in any format
4242

4343

44-
def generate_key_value_pairs(table, form):
45-
"""
46-
Generates key-value pairs in the format required to upload data to DHIS2.
47-
{'dataElement': data_element_id,
48-
'categoryOptionCombo': category_id,
49-
'value': cell_value}
50-
UIDs like data_element_id, category_id are obtained by querying the DHIS2 metadata.
51-
:param table: DataFrame generated from table detection
52-
:return: List of key value pairs as shown above.
53-
"""
54-
data_element_pairs = []
55-
56-
# Iterate over each cell in the DataFrame
57-
table_array = table.values
58-
columns = table.columns
59-
for row_index in range(table_array.shape[0]):
60-
# Row name in tally sheet
61-
data_element = table_array[row_index][0]
62-
for col_index in range(1, table_array.shape[1]):
63-
# Column name in tally sheet
64-
category = columns[col_index]
65-
cell_value = table_array[row_index][col_index]
66-
if cell_value is not None and cell_value!="-" and cell_value!="":
67-
data_element_id = None
68-
category_id = None
69-
# Search for the string in the "label" field of form information
70-
string_search = data_element + " " + category
71-
for group in form['groups']:
72-
for field in group['fields']:
73-
if field['label']==string_search:
74-
data_element_id = field['dataElement']
75-
category_id = field['categoryOptionCombo']
76-
77-
# The following exceptions will be raised if the row or column name in the tally sheet is different from the names used in metadata
78-
# For eg. Pop1: Resident is called Population 1 in metadata
79-
# If this exception is raised the only way forward is for the user to manually change the row/column name to the one used in metadata
80-
if data_element_id is None or category_id is None:
81-
raise Exception(f"Unable to find {string_search} in DHIS2 metadata")
82-
# Append to the list of data elements to be push to DHIS2
83-
data_element_pairs.append(
84-
{"dataElement": data_element_id,
85-
"categoryOptionCombo": category_id,
86-
"value": cell_value}
87-
)
88-
89-
return data_element_pairs
90-
91-
9244
def evaluate_cells(table_dfs):
9345
"""Uses simple_eval to perform math operations on each cell, defaulting to input if failed.
9446

tests/test_data_dhis2.py

+51-1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,7 @@
1-
from msfocr.data.dhis2 import getAllUIDs
1+
import pandas as pd
2+
3+
from msfocr.data.dhis2 import getAllUIDs, generate_key_value_pairs
4+
from msfocr.data import post_processing
25

36
def test_getAllUIDs(test_server_config, requests_mock):
47
requests_mock.get("http://test.com/api/categoryOptions?filter=name:ilike:12-59m", json={'categoryOptions': [{'id': 'tWRttYIzvBn', 'displayName': '12-59m'}]})
@@ -7,3 +10,50 @@ def test_getAllUIDs(test_server_config, requests_mock):
710

811
assert expected_result == result
912

13+
14+
def test_generate_key_value_pairs(test_server_config, requests_mock):
15+
"""
16+
Tests if the dataElement value in the key-value pairs is correct by providing sample tablular data.
17+
"""
18+
df = pd.DataFrame({
19+
'0': ['Paed (0-59m) vacc target population'],
20+
'0-11m': [None],
21+
'12-59m': [None],
22+
'5-14y': [None]
23+
})
24+
25+
assert len(generate_key_value_pairs(df, {'groups': [{'fields':[{"label": "Paed (0-59m) vacc target population 0-11m",
26+
"dataElement": "paedid",
27+
"categoryOptionCombo": "0to11mid",
28+
"type": "INTEGER_POSITIVE"}]}]})) == 0
29+
30+
df = pd.DataFrame({
31+
'0': ['BCG', 'Polio (OPV) 0 (birth dose)', 'Polio (OPV) 1 (from 6 wks)'],
32+
'0-11m': ['45+29', None, '30+18'],
33+
'12-59m': [None, None, '55+29'],
34+
'5-14y': [None, None, None]
35+
})
36+
37+
answer = [{'dataElement': 'bcgid', 'categoryOptions': '0to11mid', 'value': '45+29'},
38+
{'dataElement': 'polioid', 'categoryOptions': '0to11mid', 'value': '30+18'},
39+
{'dataElement': 'polioid', 'categoryOptions': '5to14yid', 'value': '55+29'}]
40+
41+
data_element_pairs = generate_key_value_pairs(df,
42+
{'groups': [{'fields':[{"label": "BCG 0-11m",
43+
"dataElement": "bcgid",
44+
"categoryOptionCombo": "0to11mid",
45+
"type": "INTEGER_POSITIVE"}]},
46+
{'fields':[{"label": "Polio (OPV) 1 (from 6 wks) 0-11m",
47+
"dataElement": "polioid",
48+
"categoryOptionCombo": "0to11mid",
49+
"type": "INTEGER_POSITIVE"}]},
50+
{'fields':[{"label": "Polio (OPV) 1 (from 6 wks) 12-59m",
51+
"dataElement": "polioid",
52+
"categoryOptionCombo": "5to14yid",
53+
"type": "INTEGER_POSITIVE"}]}]})
54+
55+
assert len(data_element_pairs) == len(answer)
56+
57+
for i in range(len(data_element_pairs)):
58+
assert data_element_pairs[i]['value'] == answer[i]['value']
59+

tests/test_data_post_processing.py

-46
Original file line numberDiff line numberDiff line change
@@ -3,52 +3,6 @@
33

44
from msfocr.data import post_processing
55

6-
def test_generate_key_value_pairs(test_server_config, requests_mock):
7-
"""
8-
Tests if the dataElement value in the key-value pairs is correct by providing sample tablular data.
9-
"""
10-
df = pd.DataFrame({
11-
'0': ['Paed (0-59m) vacc target population'],
12-
'0-11m': [None],
13-
'12-59m': [None],
14-
'5-14y': [None]
15-
})
16-
17-
assert len(post_processing.generate_key_value_pairs(df, {'groups': [{'fields':[{"label": "Paed (0-59m) vacc target population 0-11m",
18-
"dataElement": "paedid",
19-
"categoryOptionCombo": "0to11mid",
20-
"type": "INTEGER_POSITIVE"}]}]})) == 0
21-
22-
df = pd.DataFrame({
23-
'0': ['BCG', 'Polio (OPV) 0 (birth dose)', 'Polio (OPV) 1 (from 6 wks)'],
24-
'0-11m': ['45+29', None, '30+18'],
25-
'12-59m': [None, None, '55+29'],
26-
'5-14y': [None, None, None]
27-
})
28-
29-
answer = [{'dataElement': 'bcgid', 'categoryOptions': '0to11mid', 'value': '45+29'},
30-
{'dataElement': 'polioid', 'categoryOptions': '0to11mid', 'value': '30+18'},
31-
{'dataElement': 'polioid', 'categoryOptions': '5to14yid', 'value': '55+29'}]
32-
33-
data_element_pairs = post_processing.generate_key_value_pairs(df,
34-
{'groups': [{'fields':[{"label": "BCG 0-11m",
35-
"dataElement": "bcgid",
36-
"categoryOptionCombo": "0to11mid",
37-
"type": "INTEGER_POSITIVE"}]},
38-
{'fields':[{"label": "Polio (OPV) 1 (from 6 wks) 0-11m",
39-
"dataElement": "polioid",
40-
"categoryOptionCombo": "0to11mid",
41-
"type": "INTEGER_POSITIVE"}]},
42-
{'fields':[{"label": "Polio (OPV) 1 (from 6 wks) 12-59m",
43-
"dataElement": "polioid",
44-
"categoryOptionCombo": "5to14yid",
45-
"type": "INTEGER_POSITIVE"}]}]})
46-
47-
assert len(data_element_pairs) == len(answer)
48-
49-
for i in range(len(data_element_pairs)):
50-
assert data_element_pairs[i]['value'] == answer[i]['value']
51-
526

537
def test_evaluate_cells():
548
"""

0 commit comments

Comments
 (0)