-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathdata_text.py
280 lines (226 loc) · 13.3 KB
/
data_text.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
import os
import json
import pandas as pd
import numpy as np
import jsonlines
import re
import matplotlib.pyplot as plt
import csv
import glob
import zhipuai
def generate_gt(vitals,label='HR'):
#label = 'HR' or 'SpO2' or 'BVP'
#vitals = vitals_hr or vitals_spo2 or vitals_bvp
#both return: health state, 0: normal, 1: abnormal, 2: extreme abnormal
#HR:return average HR and the max HR
#SpO2:return average SpO2 and the min SpO2
#BVP:return average HR
health_state = 0
if label=='HR':
average = np.mean(vitals)
max_v = np.max(vitals)
if max_v>=100:
health_state = 1
if max_v>130:
health_state = 2
return health_state,average,max_v
elif label=='SpO2':
average = np.mean(vitals)
min_v = np.min(vitals)
if min_v<=95:
health_state = 1
if min_v<=92:
health_state = 2
return health_state,average,min_v
elif label=='BVP':
average = np.mean(vitals)
if average>0.5:
health_state = 1
return health_state,average
prompt_HR ={'introduction':'Here is a list of Heart Rate data of myself. Each point refers to a second.','task':'Please tell me the average heart rate. Keep one decimal place. And give me analysis of health. If the heart rate for anytime is under 100, health state is 0. If the heart rate for anytime is 100-130, health state is 1. If the heart rate is above 130, health state is 2. Then tell me why you have such judgment on the reasons part. Please consider the trend of the vital in time series as well. Please output as the format required: The average heart rate : XXX. The health state : XXX. Suggestions: XXX. Resons: XXX.','background':'An ideal heart rate is between 50 and 90 beats per minute (bpm). It is acceptable to continue home monitoring for 101-109. If the heart rate is 110 to 130, you would better seek advice from your GP. If the heart rate is 140 or above, you should seek medical advice immediately.','output_format':'The average heart rate : XXX. The health state : XXX. Suggestions: XXX. Resons: XXX.'}
prompt_SpO2 ={'introduction':'Here is a list of Blood Oxygen(SpO2 value) data of myself. Each point refers to a second.','task':'Please tell me the average blood oxygen. Keep one decimal place. And give me analysis of health. If SpO2 for anytim is between 96% and 99%, health state is 0. If SpO2 for anytim is between 93% and 95%, health state is 1. If SpO2 for anytim is 92 or less, health state is 2. Then tell me why you have such judgment on the reasons part. Please consider the trend of the vital in time series as well. Please output as the format required: The average blood oxygen : XXX. The health state : XXX. Suggestions: XXX. Resons: XXX.','background':'A normal blood oxygen level varies between between 96% and 99% in healthy individuals.It is acceptable to continue home monitoring for 93%-95%. If the blood oxygen level is 92% or less, it is considered low and called hypoxemia,you would better seek advice from your GP. If the blood oxygen level is below 85%, it is considered critical and called hypoxia, you should seek medical advice immediately.','output_format':'The average blood oxygen level : XXX. The health state : XXX. Suggestions: XXX. Reasons: XXX.'}
prompt_All = {'introduction':'Here is a list of Heart Rate data and a list of Blood Oxygen(SpO2 value) data of myself. Each point refers to a second.','task':'Please tell me the average blood oxygen and the average heart rate. Keep one decimal place. And give me analysis of health. Only two vitals is normal then the health is normal and output 0. Any abnormal of any vital should be considered abnormal and output 1. Then tell me why you have such judgment on the reasons part. Please consider the trend of the vital in time series as well. Please output as the format required: The average heart rate : XXX. The average blood oxygen : XXX. The health state : XXX. Suggestions: XXX. Resons: XXX.','background':'A normal blood oxygen level varies between between 96% and 99% in healthy individuals.It is acceptable to continue home monitoring for 93%-95%. If the blood oxygen level is 92% or less, it is considered low and called hypoxemia,you would better seek advice from your GP. If the blood oxygen level is below 85%, it is considered critical and called hypoxia, you should seek medical advice immediately. If SpO2 for anytim is between 96% and 99%, health state is 0. If SpO2 for anytim is between 93% and 95%, health state is 1. If SpO2 for anytim is 92 or less, health state is 2. An ideal heart rate is between 50 and 90 beats per minute (bpm). It is acceptable to continue home monitoring for 101-109. If the heart rate is 110 to 130, you would better seek advice from your GP. If the heart rate is 140 or above, you should seek medical advice immediately. If the heart rate for anytime is under 100, health state is 0. If the heart rate for anytime is 100-130, health state is 1. If the heart rate for anytime is above 130, health state is 2. ','output_format':'The average heart rate : XXX. The average blood oxygen : XXX. The health state : XXX. Suggestions: XXX. Reasons: XXX.'}
# prompt_BVP = {'introduction':'Here is a list of Blood Volume Pulse data of myself. Each second refers to 20 points.','task':'Please tell me the average blood volume pulse of this subject. And give me analysis of health of the subject.'}
def glm_api(prompt_content):
# pip install zhipuai
zhipuai.api_key = "your-api-key"
response = zhipuai.model_api.sse_invoke(
model="chatglm_pro",
prompt=[
{"role": "user", "content":prompt_content}],
temperature=0.9,
top_p=0.7,
incremental=True
)
response_data = "" # Create an empty string to store event data
for event in response.events():
if event.event == "add":
response_data += event.data
elif event.event == "error" or event.event == "interrupted":
response_data += event.data
elif event.event == "finish":
response_data += event.data
else:
response_data += event.data
return response_data
def gpt_api(prompt_content):
import openai
openai.api_key = "your-api-key"
openai.api_base = "your-api-link"
# create a chat completion
chat_completion = openai.ChatCompletion.create(model="gpt-3.5-turbo", messages=[{"role": "user", "content": prompt_content}])
# print the chat completion
res = (chat_completion.choices[0].message.content)
return res
def gt_text(file_list, mode='All', chunk_len=60):
# mode: 'All'-HR+SpO2, 'HR'-HR, 'SpO2'-SpO2
# input: file name list
# output: ground truth and text data to LLM
print("mode:", mode)
# Initialize empty lists to store ground truth and text data
ground_truth_list = []
text_data_list = []
if mode == 'HR':
for file in file_list:
if file.endswith('HR.csv'):
print('file name:', file)
vitals = HR_dict[file]
vitals_chunks = chunk_vitals(vitals, chunk_len)
for chunk in vitals_chunks:
gt = generate_gt(chunk, 'HR')
text = str(prompt_HR) + '\nHR data: ' + str(chunk)
# print('ground truth:', gt)
# print('text:', text)
# print('---------------------------')
# Append ground truth and text to lists
ground_truth_list.append(gt)
text_data_list.append(text)
elif mode == 'SpO2':
for file in file_list:
if file.endswith('SpO2.csv'):
print('file name:', file)
vitals = SpO2_dict[file]
vitals_chunks = chunk_vitals(vitals, chunk_len)
for chunk in vitals_chunks:
gt = generate_gt(chunk, 'SpO2')
text = str(prompt_SpO2) + '\nSpO2 data: ' + str(chunk)
# print('ground truth:', gt)
# print('text:', text)
# print('---------------------------')
# Append ground truth and text to lists
ground_truth_list.append(gt)
text_data_list.append(text)
elif mode == 'All':
for file in file_list:
if file.endswith('HR.csv'):
file1 = file
file2 = file[:-6] + 'SpO2.csv'
print('file name:', file1, file2)
vitals1 = HR_dict[file1]
vitals2 = SpO2_dict[file2]
vitals_chunks1 = chunk_vitals(vitals1, chunk_len)
vitals_chunks2 = chunk_vitals(vitals2, chunk_len)
for chunk1, chunk2 in zip(vitals_chunks1, vitals_chunks2):
gt1 = generate_gt(chunk1, 'HR')
gt2 = generate_gt(chunk2, 'SpO2')
gt = 'HR: ' + str(gt1) + '\n SpO2: ' + str(gt2)
text = str(prompt_All) + '\n HR data: ' + str(chunk1) + '\n SpO2 data: ' + str(chunk2)
# print('ground truth:', gt)
# print('text:', text)
# print('---------------------------')
# Append ground truth and text to lists
ground_truth_list.append(gt)
text_data_list.append(text)
# Save ground truth to a CSV file (you need to import the appropriate library for this)
# Example using the 'csv' module:
# with open('ground_truth.csv', 'w', newline='') as csvfile:
# writer = csv.writer(csvfile)
# writer.writerow(['Ground Truth'])
# writer.writerows(ground_truth_list)
# Return the list of text data to LLM
return ground_truth_list, text_data_list, mode
def extract_and_save_to_csv(origin_input, text, gt, csv_filename):
pattern = r'The average blood oxygen level : (\d+\.\d+). The health state : (\d+). Suggestions: (.+). Reasons: (.+)'
match = re.search(pattern, text)
if match:
average_blood_oxygen = match.group(1)
health_state = match.group(2)
suggestions = match.group(3)
reasons = match.group(4)
data = {
"orginal_input": origin_input,
"Average Blood Oxygen Level": average_blood_oxygen,
"Health State": health_state,
"Suggestions": suggestions,
"Reasons": reasons,
"ground_truth":gt
}
try:
with open(csv_filename, mode='a', newline='') as csv_file:
writer = csv.DictWriter(csv_file, fieldnames=data.keys())
writer.writerow(data)
except FileNotFoundError:
with open(csv_filename, mode='w', newline='') as csv_file:
writer = csv.DictWriter(csv_file, fieldnames=data.keys())
writer.writeheader()
writer.writerow(data)
else:
print("No match found in the text.")
if __name__ == "__main__":
vital_path = 'demo_data'
#load all BVP,HR,SpO2
def find_csv_files(directory):
csv_files = []
for root, dirs, files in os.walk(directory):
for file in files:
if file.endswith('BVP.csv') or file.endswith('HR.csv') or file.endswith('SpO2.csv'):
csv_files.append(os.path.join(root, file))
return csv_files
vital_csv_files = find_csv_files(vital_path)
vital_csv_files.sort()
vital_csv_files
HR_dict = {}
SpO2_dict = {}
BVP_dict = {}
csv_files = vital_csv_files
for file in csv_files:
#spilt the file name by BVP,HR,SpO2
if file.endswith('HR.csv'):
data = pd.read_csv(file)['hr']
#save the data in int num format then to the dictionary,split with ,
data_text = list(map(int, data))
HR_dict[file] = data_text
elif file.endswith('SpO2.csv'):
data = pd.read_csv(file)['spo2']
data_text = list(map(int, data))
SpO2_dict[file] = data_text
elif file.endswith('BVP.csv'):
data = pd.read_csv(file)['bvp']
data_text = list(map(int, data))
BVP_dict[file] = data_text
#chunk the vitals
# 30 60 120
def chunk_vitals(vitals,length=60):
vital_list = []
for i in range(0, len(vitals), length):
if i+length > len(vitals):
break
vital_list.append(vitals[i:])
else:
vital_list.append(vitals[i:i+60])
return vital_list
print(SpO2_dict)
vitals_test =SpO2_dict['demo_data/light_1/v01/SpO2.csv']
vitals_chunks = chunk_vitals(vitals_test)
for vital in vitals_chunks:
print(vital)
print(len(vital))
gt_list, prompt_list ,mode = gt_text(csv_files,'SpO2',60)
for prompt,gt in zip(prompt_list,gt_list):
print('\n\nprompt:',prompt)
glm_ans = glm_api(prompt_content=str(prompt))
gpt_ans = gpt_api(prompt_content=str(prompt))
print('\n\nGLM_Ans:',glm_ans)
print('\n\nGBT_Ans:',gpt_ans)
extract_and_save_to_csv(origin_input = prompt, text= glm_ans,gt = gt, csv_filename= "/share/HealthLLM/glm_res.csv")
extract_and_save_to_csv(origin_input = prompt, text= gpt_ans,gt = gt, csv_filename= "/share/HealthLLM/gpt_res.csv")