-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathconvert_climate_files.py
106 lines (97 loc) · 2.02 KB
/
convert_climate_files.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
"""Script to convert NOAA NCEI County Temp and Preciptation data to CSV"""
import pandas as pd
import csv
noaa_code_to_fips_code_state = {
1: 1,
2: 4,
3: 5,
4: 6,
5: 8,
6: 9,
7: 10,
8: 12,
9: 13,
10: 16,
11: 17,
12: 18,
13: 19,
14: 20,
15: 21,
16: 22,
17: 23,
18: 24,
19: 25,
20: 26,
21: 27,
22: 28,
23: 29,
24: 30,
25: 31,
26: 32,
27: 33,
28: 34,
29: 35,
30: 36,
31: 37,
32: 38,
33: 39,
34: 40,
35: 41,
36: 42,
37: 44,
38: 45,
39: 46,
40: 47,
41: 48,
42: 49,
43: 50,
44: 51,
45: 53,
46: 54,
47: 55,
48: 56,
50: 2,
}
def convert_to_df(filename):
df = pd.read_fwf(
filename,
widths=[2, 3, 2, 4, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7],
header=None,
)
df.columns = [
'state_code',
'county_fips',
'element',
'year',
'january',
'february',
'march',
'april',
'may',
'june',
'july',
'august',
'september',
'october',
'november',
'december',
]
df['state_fips'] = df['state_code'].map(noaa_code_to_fips_code_state)
df['fips_int'] = (df['state_fips'] * 1000 + df['county_fips']).astype(int)
df['fips_str'] = df['fips_int'].apply(lambda x: f'{x:05}')
return df
if __name__ == '__main__':
import argparse
from pathlib import Path
parser = argparse.ArgumentParser()
parser.add_argument('file_name')
parser.add_argument('-o', '--output-format', default='csv', choices=['csv', 'parquet'])
args = parser.parse_args()
my_file = Path(args.file_name)
df = convert_to_df(my_file)
if args.output_format == 'csv':
new_file = my_file.with_suffix('.csv')
df.to_csv(new_file, index=False, quoting=csv.QUOTE_NONNUMERIC)
elif args.output_format == 'parquet':
new_file = my_file.with_suffix('.parquet')
df.to_parquet(new_file, index=False)