-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathfilter_us_tweets.py
39 lines (33 loc) · 1023 Bytes
/
filter_us_tweets.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
import sys
import ast
import json
###########################################################
# filter_us_tweets.py
#
# Purpose:
# filter out tweets located in the US.
# filter logic is the following:
# tweet.place.country_code == 'US'
#
# Requirement:
# Python3.7+
#
# Example:
# gunzip -c coronavirus_12-27-2020.gz | python3 filter_us_tweets.py >> coronavirus_12-27-2020_us.json
#
# Author:
# Qiushi Bai ([email protected])
###########################################################
def is_us_tweet(tweet_dict):
if 'place' in tweet_dict.keys():
if tweet_dict['place'] is not None:
if 'country_code' in tweet_dict['place'].keys():
if tweet_dict['place']['country_code'].lower() == 'us':
return True
return False
if __name__ == '__main__':
for line in sys.stdin:
tweet_dict = ast.literal_eval(line)
if is_us_tweet(tweet_dict):
tweet_json = json.dumps(tweet_dict)
print(tweet_json)