-
Notifications
You must be signed in to change notification settings - Fork 63
/
Copy pathconfig.py-example
190 lines (160 loc) · 7.83 KB
/
config.py-example
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
""" 4CAT configuration """
import os
import configparser
DOCKER_CONFIG_FILE = 'docker/shared/docker_config.ini'
# Data source configuration
DATASOURCES = {
"bitchute": {},
"custom": {},
"douban": {},
"customimport": {},
"parler": {},
"reddit": {
"boards": "*",
},
"telegram": {},
"twitterv2": {'id_lookup': False}
}
#####################
# Processor Options #
#####################
# download_images.py
MAX_NUMBER_IMAGES = 1000
# YouTube variables to use for processors
YOUTUBE_API_SERVICE_NAME = "youtube"
YOUTUBE_API_VERSION = "v3"
YOUTUBE_DEVELOPER_KEY = ""
# Tumblr API keys to use for data capturing
TUMBLR_CONSUMER_KEY = ""
TUMBLR_CONSUMER_SECRET_KEY = ""
TUMBLR_API_KEY = ""
TUMBLR_API_SECRET_KEY = ""
# Reddit API keys
REDDIT_API_CLIENTID = ""
REDDIT_API_SECRET = ""
# tcat_auto_upload.py
TCAT_SERVER = ''
TCAT_TOKEN = ''
TCAT_USERNAME = ''
TCAT_PASSWORD = ''
# pix-plot.py
# If you host a version of https://github.com/digitalmethodsinitiative/dmi_pix_plot, you can use a processor to publish
# downloaded images into a PixPlot there
PIXPLOT_SERVER = ""
########################
# 4CAT General Options #
########################
# Configure how the tool is to be named in its web interface. The backend will
# always refer to '4CAT' - the name of the software, and a 'powered by 4CAT'
# notice may also show up in the web interface regardless of the value entered here.
TOOL_NAME = "4CAT"
TOOL_NAME_LONG = "4CAT: Capture and Analysis Toolkit"
# Postgres login details
DB_HOST = "localhost"
DB_PORT = 5432
DB_USER = "fourcat"
DB_NAME = "fourcat"
DB_PASSWORD = "supers3cr3t"
# Path to folders where logs/images/data may be saved.
# Paths are relative to the folder this config file is in.
PATH_ROOT = os.path.abspath(os.path.dirname(__file__)) # better don't change this
PATH_LOGS = "" # store logs here - empty means the 4CAT root folder
PATH_IMAGES = "" # if left empty or pointing to a non-existent folder, no images will be saved
PATH_DATA = "" # search results will be stored here as CSV files
PATH_LOCKFILE = "backend" # the daemon lockfile will be saved in this folder. Probably no need to change!
PATH_SESSIONS = "sessions" # folder where API session data is stored (e.g., Telegram)
# The following two options should be set to ensure that every analysis step can
# be traced to a specific version of 4CAT. This allows for reproducible
# research. You can however leave them empty with no ill effect. The version ID
# should be a commit hash, which will be combined with the Github URL to offer
# links to the exact version of 4CAT code that produced an analysis result.
# If no version file is available, the output of "git show" in PATH_ROOT will be used
# to determine the version, if possible.
PATH_VERSION = ".git-checked-out" # file containing a commit ID (everything after the first whitespace found is ignored)
GITHUB_URL = "https://github.com/digitalmethodsinitiative/4cat" # URL to the github repository for this 4CAT instance
# These settings control whether top-level datasets (i.e. those created via the
# 'Create dataset' page) are deleted automatically, and if so, after how much
# time. You can also allow users to cancel this (i.e. opt out). Note that if
# users are allowed to opt out, data sources can still force the expiration of
# datasets created through that data source. This cannot be overridden by the
# user.
EXPIRE_DATASETS = 0 # 0 or False-y to not expire
EXPIRE_ALLOW_OPTOUT = True # allow users to opt out of expiration
# 4CAT has an API (available from localhost) that can be used for monitoring
# and will listen for requests on the following port. "0" disables the API.
API_HOST = "localhost"
API_PORT = 4444
# 4CAT can anonymise author names in results and does so using a hashed version
# of the author name + a salt. The salt should be defined here. This should be
# a random string; in Python you can generate one with e.g. bcrypt.gensalt()
# You need to set this before running 4CAT. 4CAT will refuse to run if this is
# left at its default value.
ANONYMISATION_SALT = "REPLACE_THIS"
# Warning report configuration
WARN_INTERVAL = 600 # every so many seconds, compile a report of logged warnings and e-mail it to admins
WARN_LEVEL = "WARNING" # only alerts above this level are mailed: DEBUG/INFO/WARNING/ERROR/CRITICAL
WARN_SLACK_URL = "" # A Slack callback URL may be entered here; any warnings equal to or above
# WARN_LEVEL will be sent there immediately
# E-mail settings
# If your SMTP server requires login, define the MAIL_USERNAME and
# MAIL_PASSWORD variables here additionally.
WARN_EMAILS = [] # e-mail addresses to send warning reports to
ADMIN_EMAILS = [] # e-mail of admins, to send account requests etc to
MAILHOST = "localhost" # SMTP server to connect to for sending e-mail alerts.
MAIL_SSL = False # use SSL to connect to e-mail server?
MAIL_USERNAME = ""
MAIL_PASSWORD = ""
NOREPLY_EMAIL = "noreply@localhost"
# Scrape settings for data sources that contain their own scrapers
SCRAPE_TIMEOUT = 5 # how long to wait for a scrape request to finish?
SCRAPE_PROXIES = {"http": []} # Items in this list should be formatted like "http://111.222.33.44:1234"
IMAGE_INTERVAL = 3600
# Explorer settings
# The maximum allowed amount of rows (prevents timeouts and memory errors)
MAX_EXPLORER_POSTS = 100000
# Web tool settings
class FlaskConfig:
FLASK_APP = 'webtool/fourcat'
SECRET_KEY = "REPLACE_THIS"
SERVER_NAME = 'localhost:5000' # if using a port other than 80, change to localhost:specific_port
SERVER_HTTPS = False # set to true to make 4CAT use "https" in absolute URLs
HOSTNAME_WHITELIST = ["localhost"] # only these may access the web tool; "*" or an empty list matches everything
HOSTNAME_WHITELIST_API = ["localhost"] # hostnames matching these are exempt from rate limiting
HOSTNAME_WHITELIST_NAME = "Automatic login"
##########
# DOCKER #
##########
# Docker setup requires matching configuration for the following values
# These values will overwrite anything set previously in this config and
# originate from the .env file or the docker_config.ini file
if os.path.exists(DOCKER_CONFIG_FILE):
config = configparser.ConfigParser()
config.read(DOCKER_CONFIG_FILE)
if config['DOCKER'].getboolean('use_docker_config'):
# can be your server url or ip
your_server = config['SERVER'].get('server_name', 'localhost')
DB_HOST = config['DATABASE'].get('db_host')
DB_PORT = config['DATABASE'].getint('db_port')
DB_USER = config['DATABASE'].get('db_user')
DB_NAME = config['DATABASE'].get('db_name')
DB_PASSWORD = config['DATABASE'].get('db_password')
API_HOST = config['API'].get('api_host')
API_PORT = config['API'].getint('api_port')
PATH_ROOT = os.path.abspath(os.path.dirname(__file__)) # better don't change this
PATH_LOGS = config['PATHS'].get('path_logs', "")
PATH_IMAGES = config['PATHS'].get('path_images', "")
PATH_DATA = config['PATHS'].get('path_data', "")
PATH_LOCKFILE = config['PATHS'].get('path_lockfile', "")
PATH_SESSIONS = config['PATHS'].get('path_sessions', "")
ANONYMISATION_SALT = config['GENERATE'].get('anonymisation_salt')
class FlaskConfig:
FLASK_APP = 'webtool/fourcat'
SECRET_KEY = config['GENERATE'].get('secret_key')
if config['SERVER'].getint('public_port') == 80:
SERVER_NAME = your_server
else:
SERVER_NAME = f"{your_server}:{config['SERVER'].get('public_port')}"
SERVER_HTTPS = False # set to true to make 4CAT use "https" in absolute URLs; DOES NOT CURRENTLY WORK WITH DOCKER SETUP
HOSTNAME_WHITELIST = ["localhost", your_server] # only these may access the web tool; "*" or an empty list matches everything
HOSTNAME_WHITELIST_API = ["localhost", your_server] # hostnames matching these are exempt from rate limiting
HOSTNAME_WHITELIST_NAME = "Automatic login"