-
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtwitter_data_extractor.py
149 lines (124 loc) · 4.13 KB
/
twitter_data_extractor.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
from argparse import ArgumentParser
from exceptions import (
TwitterAPISetupError,
UnsupportedExtractorError,
TwitterDataExtractorException,
MissingUsernameParameterError,
UserNotFoundError,
PrivateAccountError,
UnsupportedReporterError,
ExtractorDatabaseError,
MissingShareMailError,
)
from factory.extractor_factory import ExtractorFactory
from factory.reporter_factory import ReporterFactory
from twitter_api_service import TwitterAPIService
from utils import logger
__author__ = "Coşkun Deniz <[email protected]>"
def handle_exception(exp: TwitterDataExtractorException) -> None:
"""Print the error message and exit
:type exp: TwitterDataExtractorException
:param exp: Exception raised by the data extractor/reporter components
"""
logger.error(exp)
raise SystemExit() from exp
def get_arg_parser() -> ArgumentParser:
"""Get argument parser
:rtype: ArgumentParser
:returns: ArgumentParser object
"""
arg_parser = ArgumentParser()
arg_parser.add_argument(
"-c", "--useconfig", action="store_true", help="Read configuration from config.json file"
)
arg_parser.add_argument(
"-cf",
"--configfile",
default="config.json",
help="Read configuration from given file",
)
arg_parser.add_argument(
"--forme",
action="store_true",
help="Determine API user(account owner or on behalf of a user)",
)
arg_parser.add_argument("-u", "--user", help="Extract user data for the given username")
arg_parser.add_argument(
"-ul", "--users", help="Extract user data for the given comma separated usernames"
)
arg_parser.add_argument(
"-fr", "--friends", action="store_true", help="Extract friends data for the given username"
)
arg_parser.add_argument(
"-fl",
"--followers",
action="store_true",
help="Extract followers data for the given username",
)
arg_parser.add_argument(
"-ut",
"--user_tweets",
action="store_true",
help="Extract tweets of user with the given username",
)
arg_parser.add_argument(
"-s",
"--search",
help="Extract latest tweets for the given search keyword",
)
arg_parser.add_argument(
"-tc",
"--tweet_count",
type=int,
help="Limit the number of tweets gathered",
)
arg_parser.add_argument(
"-e",
"--excludes",
default="retweets",
help="Fields to exclude from tweets queried as comma separated values (replies,retweets)",
)
arg_parser.add_argument(
"-ot",
"--output_type",
default="xlsx",
help="Output file type (csv, xlsx, gsheets, mongodb or sqlite)",
)
arg_parser.add_argument("-of", "--output_file", default="results.xlsx", help="Output file name")
arg_parser.add_argument(
"-sm", "--share_mail", help="Mail address to share Google Sheets document"
)
return arg_parser
def main(args) -> None:
"""Entry point for the tool
:type args: Namespace
:pram args: Command line args returned by ArgumentParser
"""
try:
api_service = TwitterAPIService(args.forme)
api_service.setup_api_access()
except TwitterAPISetupError as exp:
handle_exception(exp)
try:
extractor = ExtractorFactory.get_extractor(args)
except UnsupportedExtractorError as exp:
handle_exception(exp)
try:
extracted_data = extractor.extract_data(api_service)
except (MissingUsernameParameterError, UserNotFoundError) as exp:
handle_exception(exp)
try:
reporter = ReporterFactory.get_reporter(args)
except (UnsupportedReporterError, ExtractorDatabaseError, MissingShareMailError) as exp:
handle_exception(exp)
try:
reporter.save(extracted_data)
except (PrivateAccountError, ExtractorDatabaseError) as exp:
handle_exception(exp)
if __name__ == "__main__":
arg_parser = get_arg_parser()
args = arg_parser.parse_args()
try:
main(args)
except KeyboardInterrupt:
logger.info("Program ended manually.")