Skip to content

Commit b02ed0f

Browse files
committed
Replace streetviewfetch folder with new streetview_retrieval
1 parent 5b73558 commit b02ed0f

9 files changed

+1063
-0
lines changed
+38
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
""" dataset_convert.py
2+
Author: Po-Yu Hsieh ([email protected])
3+
Last update: 2019/05/02
4+
5+
"""
6+
import sys
7+
import tools.coordinate_conversion as corcv
8+
9+
def run():
10+
if len(sys.argv) != 5:
11+
print("Usage: dataset_convert.py [STREETDB_IN] [SIDEWALKDB_IN] [STREETDB_OUT] [SIDEWALKDB_OUT]")
12+
return
13+
14+
_STREET_DB_PATH, _SIDEWALK_DB_PATH, _STREET_CONV_OUT_PATH, _SIDEWALK_CONV_OUT_PATH = sys.argv[1:]
15+
_SRC_CODE, _TGT_CODE = 6492, 4326 # EPSG codes
16+
17+
_PARAMS_ST = {
18+
"input_file": _STREET_DB_PATH, "output_file": _STREET_CONV_OUT_PATH,
19+
"source_code": _SRC_CODE, "target_code": _TGT_CODE, "xy_coordinate": True,
20+
"record_map": lambda rec: {"name": rec["GREENBOOK"]}
21+
}
22+
23+
_PARAMS_SW = {
24+
"input_file": _SIDEWALK_DB_PATH, "output_file": _SIDEWALK_CONV_OUT_PATH,
25+
"source_code": _SRC_CODE, "target_code": _TGT_CODE, "xy_coordinate": True
26+
}
27+
28+
print("Start converting street dataset coordinates...")
29+
toolkit = corcv.CoordinateConversionToolset(**_PARAMS_ST)
30+
toolkit.convert_shape_coordinates()
31+
print("Start converting sidewalk dataset coordinates...")
32+
toolkit = corcv.CoordinateConversionToolset(**_PARAMS_SW)
33+
toolkit.convert_shape_coordinates()
34+
print("Done!")
35+
36+
37+
if __name__ == "__main__":
38+
run()

streetview_retrieval/dependencies.txt

+6
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
requests
2+
pyshp
3+
rtree
4+
nvector
5+
geopy
6+
PyGeodesy
+43
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
import argparse
2+
import tools.sidewalk_partition as sp
3+
4+
def parse_args():
5+
parser = argparse.ArgumentParser(description="Sidewalk partition and query parameter \
6+
generation routine for Boston StreetCaster project.")
7+
"""
8+
Required Arguments
9+
"""
10+
parser.add_argument("--sidewalkfile", dest="sidewalk_file", type=str, required=True,
11+
help="File path to sidewalk JSON dataset.")
12+
parser.add_argument("--index_path", dest="index_path", type=str, required=True,
13+
help="File path to store R-Tree index of street segments.")
14+
parser.add_argument("--out_path", dest="out_path", type=str, required=True,
15+
help="Output folder path to store query parameters and metadata.")
16+
17+
"""
18+
Optional Arguments
19+
"""
20+
parser.add_argument("--street_file", dest="street_file", type=str, required=False,
21+
help="If R-tree index is not built, then this is the path for converted \
22+
street coordinate file used to construct spatial index.")
23+
parser.add_argument("--part_len", dest="part_len", type=float, required=False,
24+
help="Partition lengh for sidewalk blocks (in meters).")
25+
parser.add_argument("--threshold", dest="threshold", type=float, required=False,
26+
help="Threshod for identifying matching street segment (in degrees).")
27+
parser.add_argument("--shot_angle", dest="shot_angle", type=float, required=False,
28+
help="Camera angle difference to sidwalk's direction (in degrees).")
29+
parser.add_argument("--shot_dist", dest="shot_dist", type=float, required=False,
30+
help="Camera distance to sidewalk partition's center (in meters).")
31+
parser.add_argument("--verbose", dest="verbose", required=False, action="store_true",
32+
help="Print out query parameter settings for each partition.")
33+
34+
return {key: val for key, val in vars(parser.parse_args()).items() if val}
35+
36+
def run():
37+
""" Main routine.
38+
"""
39+
arguments = parse_args()
40+
sp.QueryGenerationRunner(**arguments).run()
41+
42+
if __name__ == "__main__":
43+
run()
+104
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,104 @@
1+
""" query_sampling.py
2+
3+
Author: Po-Yu Hsieh ([email protected])
4+
Last Update: 2019/05/02
5+
"""
6+
7+
import os
8+
import sys
9+
import json
10+
import csv
11+
from random import randint, sample
12+
from copy import deepcopy
13+
import tools.streetview_retrieval as sr
14+
15+
16+
def query_count(filepath):
17+
with open(filepath, "r") as fd_r:
18+
result = len(fd_r.readlines())
19+
return result
20+
21+
def sample_runner(file_queries, file_meta, sample_size, credential_path, output_info=None, subsample=None):
22+
print("Initialize search tool...")
23+
qrtool = sr.StreetviewQueryToolset(credential_path=credential_path, verbose=True)
24+
query_data_m = {
25+
"location": "",
26+
"size": "640x420",
27+
"heading": 0,
28+
"fov": 90,
29+
"pitch": 0,
30+
"radius": 20,
31+
"source": "outdoor"
32+
}
33+
query_data_s = {
34+
"pano": "",
35+
"size": "640x420",
36+
"heading": 0,
37+
"fov": 90,
38+
"pitch": 0,
39+
"radius": 20,
40+
"source": "outdoor"
41+
}
42+
43+
k = sample_size
44+
if subsample and subsample > k:
45+
sample_list = sample(range(query_count(file_queries)), subsample)
46+
subsample = True
47+
else:
48+
subsample = False
49+
50+
buckets = [None for i in range(k)]
51+
stream_ct = 0
52+
53+
print("Sampling...")
54+
with open(file_meta, newline='') as meta_csv, open(file_queries) as query_file:
55+
m_reader = csv.DictReader(meta_csv)
56+
for row_id, (m_dict, q_line) in enumerate(zip(m_reader, query_file.readlines())):
57+
# Step 0: Work under subsampling
58+
if subsample and (row_id not in sample_list):
59+
continue
60+
# Step 1: Form parameters and check data availability
61+
print("Handling data id: {}".format(row_id))
62+
query_json = json.loads(q_line)
63+
query_data_m["location"] = "{0:f},{1:f}".format(*(query_json["location"][::-1]))
64+
query_data_m["heading"] = query_json["heading"]
65+
pano_id = qrtool.get_meta(query_data_m)
66+
67+
if pano_id:
68+
query_data_s["pano"] = pano_id
69+
query_data_s["heading"] = query_json["heading"]
70+
# Step 2-1: For the first k ones, insert them anyway
71+
if stream_ct < k:
72+
buckets[stream_ct] = {"row_id": row_id, "meta": m_dict, "query": deepcopy(query_data_s)}
73+
# Step 2-2: For items afterward, do resevoir sampling
74+
else:
75+
rv_decision = randint(0, stream_ct)
76+
if rv_decision < k:
77+
buckets[rv_decision] = {"row_id": row_id, "meta": m_dict, "query": deepcopy(query_data_s)}
78+
# Increment counter
79+
stream_ct += 1
80+
81+
# Step 3: Make queries based on sampled result
82+
print("Query based on sample result...")
83+
# For safety.
84+
if output_info:
85+
fd_w = open(output_info, "w")
86+
for q_item in buckets:
87+
fd_w.write("{}\n".format(json.dumps(q_item)))
88+
fd_w.close()
89+
print("Done!")
90+
91+
def run():
92+
if not 5 <= len(sys.argv) <= 6:
93+
print("Usage: python3.7 query_sampling.py QUERY_FILE META_FILE SAMPLE_SIZE AUTH_PATH OUTPUT_INFO_PATH ([SUBSAMPLE_SIZE])")
94+
return
95+
file_queries, file_meta, sample_size, credential_path, output_info = sys.argv[1:6]
96+
sample_size = int(sample_size)
97+
if len(sys.argv) == 6:
98+
subsample = int(sys.argv[6])
99+
else:
100+
subsample = None
101+
sample_runner(file_queries, file_meta, sample_size, credential_path, output_info, subsample)
102+
103+
if __name__ == "__main__":
104+
run()

streetview_retrieval/readme.md

+104
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,104 @@
1+
# Sidewalk Dataset Preprocessing
2+
3+
In this document we'll describe how we preprocess dataset we're given, and derive what we needed for querying Google's [street view API](https://developers.google.com/maps/documentation/streetview/intro) for sidewalk images.
4+
5+
## About Dataset and Resource
6+
7+
We have our sidewalk and street data from Boston Hack 2018's dataset. The whole dataset can be obtained from project description [here](https://docs.google.com/document/d/1jS3QsgjQLZyYoZzs0WbrA_SrOWAhEUv6Cc_a8X0oHJA/).
8+
9+
The street dataset consists of line segments of streets in Boston; and sidewalk contains shape polygons of planned sidewalk blocks. Both datasets use coordinates under *EPSG:6492*'s ([description](https://epsg.io/6492)) projection settings.
10+
11+
### Coordinate Conversion
12+
13+
Because Google's street view API only accept *WGS84* (worldwide coordinate system, [ref](https://en.wikipedia.org/wiki/World_Geodetic_System)), while the dataset uses another coordinate system, we need to transform location coordinates beforehand. There are several Python libraries supporting coordinate conversion, such as `pyproj`. However, we found that transfomation result from local machine suffers from significant numeral error, which can't be directly applied in any further task.
14+
15+
To address this issue, we use [epsg.io](http://epsg.io/)'s free API to obtain coordinates with higher precision ([API description](https://github.com/klokantech/epsg.io)). We wrote a function that converts a series of locations from one projection system to the other for this task.
16+
17+
### Sidewalk Block Partition
18+
19+
Most of the sidewalk blocks are too long to be included in single image, hence we may want to separate single block into multiple partitions of similar length, then retrieve images separately. So the next thing to do is to know about how many photos to be taken along the sidewalk, along with location of each partition block. We desinged an algorithm to approximate a simpler shape of the sidewalk, then do partition on the simplified shape to obtain requried partitions.
20+
21+
The approximation algorithm is based on the assumpiton that most of the blocks are neary rectangular-shaped. Firstable, we approxmiate the block as a quadrilateral defined by points which obtain extreme value (maximum and minimum) on both coordinate directions. Then, take the longest side of the quadrilateral to decide number of partitions and main heading direction to the block. Last we partitioned the longest edge along with the edge across, and take the midpoint of evenly segmented points on both sides as partition's center.
22+
23+
### Street Matching and Camera Settings
24+
25+
The main problem of the dataset is that the sidewalk dataset doesn't provide any information about which road it belongs to. Also, we need to know relative position between street and sidewalk to make sure the camera is set up on the correct side of target sidewalk in order to obtain valid result. In spatial database applications, the most basic way for various kinds of searching operations (such as intersection and nearest-neighbor search) is to build up index for regtangualr bounding boxes of all itmes, then work on relationship between bounding boxes before any analysis on detailed shapes inside. Another useful clue that can be used to match street segment and sidewalk is using angle difference between their heading directions, since idealy they're nearly parallel to each other.
26+
27+
So this is how we deal with this case. First, we build up an R-tree index for all line segments in the street dataset, then use the center of target sidewalk partition to do nearest-neighbor query (on bounding boxes) for possible candidates, then use angle differenceto find out segment that best fits.
28+
29+
### Obtaining Sidewalk Images from Google's Street View API
30+
31+
Other than solving camera heading, we need to decide other factors such as camera position, pitch rotation, and zooming for querying street view images, while guarantee the area we want appears is large enough on result image. However, obtaining the right camera settings is another researchable problem far beyond our main goal, so we just set all camera positions to be some point away from partition centner at a fixed distance, and use default value in Google's API for other settings.
32+
33+
### Future Work
34+
35+
We're told that the original datset for sidewalks [Link](https://data.boston.gov/dataset/sidewalk-inventory) has more attributes that are crucial to refine all above tasks, such as street it belongs to and sidewalk width. Future updates will combine this dataset so that we can obtain more accurate camera position and settings for the target.
36+
37+
Also, the assumptions we used for generating camera settings can fail in some cases, and there are still lots of possible improvement can be done in the future, such as approximating parameters other than position and heading for the query.
38+
39+
### Dependencies
40+
41+
All the programs are developed under Python3.7, and requre these external libraries other than built-ins:
42+
43+
* `requests`, library for HTTP requests
44+
* `pyshp`, library for reading Shapefile
45+
* `rtree`, wrapper library for [libspatialindex](https://libspatialindex.org/) to build up R-tree index.
46+
* `nvector`, library that provides tools for solving common geographical questions
47+
* `geopy`, library for geocoding and distance computation
48+
* `PyGeodesy`, library for geodesy operations
49+
50+
First, run this command to install all dependencies:
51+
52+
```
53+
python3.7 -m pip install -r dependencies.txt
54+
```
55+
56+
Then follow [this](http://toblerity.org/rtree/install.html) instruction to have `libspatialindex` installed. For macOS users (which is not covered from official website), run `brew install spatialindex` through `homebrew` to install this package.
57+
58+
### How to run
59+
#### Coordination Conversion
60+
61+
Say you have your Shapefiles for street and sidewalk in directories `ST_DB` and `SW_DB`, and want to output the converted result to `ST_OUT_PATH`, `SW_OUT_PATH`, respectively. Then you can run this command to convert both datasets:
62+
```
63+
python3 dataset_convert.py "ST_DB" "SW_DB" "ST_OUT_PATH" "SW_OUT_PATH"
64+
```
65+
66+
#### Partitioning, Query Generation and Obtain Images
67+
68+
To partition converted sidewalk datset and generate query parameters, use `query_generation.py` and run:
69+
70+
```
71+
python3.7 query_generation.py [YOUR_PARAMETERS...]
72+
```
73+
74+
For details about parameters, check them out by running:
75+
76+
```
77+
python3.7 query_generation.py -h
78+
```
79+
80+
Running `query_generation` will output three text files:
81+
- `sidewalk_info.txt`: Information of sidewalk blocks, mainly about how many partitions are made for this block for querying.
82+
- `metadata.txt`: Metadata for partition queries, includeing their center position, heading direction and the matched street segment.
83+
- `queries.txt`: Query parameters for Google street view API (in JSON string format)
84+
85+
As for obtaining sidewalk from queries generated, right now we haven't solution to obtain. Since Google's street view API is chraged service, user may consider not requesting all images at once.
86+
87+
However, if you'd like to sample from the set of all queries, then you can try `sample_retrive.py` and `query_sampling.py`, which samples from output result above, filter out ones with valid result from Google's API, and query street view images from the samples.
88+
89+
Google's API require product key and signature secret for forming queries, so you need to store your authetication credentials in a JSON file with this format:
90+
91+
```json
92+
{
93+
"key": "[Your Google API key here]",
94+
"secret": "[And your API signature secret here]"
95+
}
96+
```
97+
98+
Suppose that you store all outputs from `query_generation` under `partitions` directory, and store the keys in `credentials.json`. Then running the following commands will randomly select 100 valid queries (out of 1000 samples) and store images obtained to `sample_output` directory (with *test_sample_* as filename prefix):
99+
100+
```
101+
python3.7 query_sampling.py partitions/queries.txt partitions/metadata.txt 100 credentials.json partitions/samples.txt 1000
102+
python3.7 sample_retrive.py partitions/samples.txt credentials.json sample_output test_sample_ [-v]
103+
```
104+
+49
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,49 @@
1+
""" sample_retrive.py
2+
3+
Author: Po-Yu Hsieh ([email protected])
4+
Last Update: 2019/05/02
5+
"""
6+
7+
import os, sys
8+
import json
9+
import tools.streetview_retrieval as sr
10+
11+
def fetch_sidewalk_images(query_file, auth_path, output_path, tag, verbosity=False):
12+
""" Obtain images from sample information provided in run.py.
13+
14+
Args:
15+
query_file - (str) File path for query information.
16+
auth_path - (str) File path for Google API's auth keys.
17+
out_path - (str) Output path for returned images.
18+
tag - (str) Prefix for output images
19+
verbosity - (bool) Verbosity settings for StreetviewQueryToolset
20+
"""
21+
qrtool = sr.StreetviewQueryToolset(credential_path=auth_path, verbosity=verbosity)
22+
with open(query_file, "r") as fd_r:
23+
for line in fd_r.readlines():
24+
query_params = json.loads(line)["query"]
25+
# Even if it's Google we can still get errors, in this case we just retry it.
26+
while True:
27+
try:
28+
qrtool.get_streetview(output_path, json.loads(line)["query"], tag, False)
29+
except:
30+
continue
31+
else:
32+
break
33+
34+
def run():
35+
if len(sys.argv) < 5 or (len(sys.argv) == 6 and "-v" not in sys.argv):
36+
print("Usage:\n python3.7 sample_retrive.py QUERY_PATH AUTH_PATH OUT_PATH FILE_PREFIX [-v]")
37+
return
38+
arguments = [i for i in sys.argv[1:]]
39+
if len(argument) == 5:
40+
_ = argument.pop(argument.index("-v"))
41+
verbosity = True
42+
else:
43+
verbosity = False
44+
query_file, auth_path, out_path, tag = arguments
45+
fetch_sidewalk_images(query_file, auth_path, out_path, tag, verbosity)
46+
47+
48+
if __name__ == "__main__":
49+
run()

0 commit comments

Comments
 (0)