-
Notifications
You must be signed in to change notification settings - Fork 10
/
Copy pathmake-dataset.py
74 lines (67 loc) · 3.14 KB
/
make-dataset.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
import argparse
import glob
import numpy as np
import os
import pandas as pd
import rasterio
from rasterio.warp import transform_bounds
from pyproj import CRS
import shutil
import tarfile
from tqdm import tqdm
from helpers import tile256
def main(args):
# Creating project folder structure"
dir_list = ['data','models']
for dirc in dir_list:
directory_path = os.path.join(args.proj_dir,dirc)
os.makedirs(directory_path, exist_ok=True)
with tarfile.open(args.chips) as tar:
print(f'Reading file {os.path.basename(args.chips)}')
member_list = tar.getmembers()
for i in tqdm(range(len(member_list)),position=0, leave=True,desc="S1 chips extraction"):
if member_list[i].name.endswith('.tif'):
tar.extract(member_list[i],os.path.join(args.proj_dir,'data'))
with tarfile.open(args.labels) as tar:
print(f'Reading file {os.path.basename(args.labels)}')
member_list = tar.getmembers()
for i in tqdm(range(len(member_list)),position=0, leave=True,desc="S1 water labels extraction"):
if member_list[i].name.endswith('.tif'):
tar.extract(member_list[i],os.path.join(args.proj_dir,'data'))
# Getting S1 vv,vh and water label chip file paths
s1_vv_img_path_list = sorted(glob.glob(os.path.join(args.proj_dir,'data','c2smsfloods_v1_source_s1','*/','*VV.tif'), recursive = True))
s1_vh_img_path_list = sorted(glob.glob(os.path.join(args.proj_dir,'data','c2smsfloods_v1_source_s1','*/','*VH.tif'), recursive = True))
s1_img_label_path_list = sorted(glob.glob(os.path.join(args.proj_dir,'data','c2smsfloods_v1_labels_s1_water','*/','*.tif'), recursive = True))
# Covert to 256_256 tiles
for i in tqdm(range(len(s1_vv_img_path_list)),position=0, leave=True,desc="Tiling VV chips"):
tile256(s1_vv_img_path_list[i],os.path.join(args.proj_dir,'data','chips','VV'))
for i in tqdm(range(len(s1_vh_img_path_list)),position=0, leave=True,desc="Tiling VH chips"):
tile256(s1_vh_img_path_list[i],os.path.join(args.proj_dir,'data','chips','VH'))
for i in tqdm(range(len(s1_img_label_path_list)),position=0, leave=True,desc="Tiling water labels"):
tile256(s1_img_label_path_list[i],os.path.join(args.proj_dir,'data','labels'))
print('Removing original c2smsfloods data')
for dirc in glob.glob(os.path.join(args.proj_dir,'data','c2s*')):
shutil.rmtree(dirc)
print('Process completed')
if __name__ == "__main__":
parser = argparse.ArgumentParser(description='Converting Cloud to Street - Microsoft flood dataset (Sentinel-1) chip (256*256) information to a CSV file')
parser.add_argument(
"--proj_dir",
default=os.getcwd(),
type=str,
help="Project directory location",
)
parser.add_argument(
"--chips",
type=str,
required=True,
help="file path for S1 chip data (tar.gz)",
)
parser.add_argument(
"--labels",
type=str,
required=True,
help="file path for S1 label data (tar.gz)",
)
args = parser.parse_args()
main(args)