Skip to content

Commit 1c07fcc

Browse files
committed
Digikala crawler project initializtion
0 parents  commit 1c07fcc

40 files changed

+1123
-0
lines changed

.gitignore

+113
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,113 @@
1+
# Created by https://www.gitignore.io
2+
3+
### OSX ###
4+
.DS_Store
5+
.AppleDouble
6+
.LSOverride
7+
8+
# Icon must end with two \r
9+
Icon
10+
11+
12+
# Thumbnails
13+
._*
14+
15+
# Files that might appear on external disk
16+
.Spotlight-V100
17+
.Trashes
18+
19+
# Directories potentially created on remote AFP share
20+
.AppleDB
21+
.AppleDesktop
22+
Network Trash Folder
23+
Temporary Items
24+
.apdisk
25+
26+
27+
### Python ###
28+
# Byte-compiled / optimized / DLL files
29+
__pycache__/
30+
*.py[cod]
31+
32+
# C extensions
33+
*.so
34+
35+
# Distribution / packaging
36+
.Python
37+
env/
38+
venv/
39+
build/
40+
develop-eggs/
41+
dist/
42+
downloads/
43+
eggs/
44+
lib/
45+
lib64/
46+
parts/
47+
sdist/
48+
var/
49+
*.egg-info/
50+
.installed.cfg
51+
*.egg
52+
53+
# PyInstaller
54+
# Usually these files are written by a python script from a template
55+
# before PyInstaller builds the exe, so as to inject date/other infos into it.
56+
*.manifest
57+
*.spec
58+
59+
# Installer logs
60+
pip-log.txt
61+
pip-delete-this-directory.txt
62+
63+
# Unit test / coverage reports
64+
htmlcov/
65+
.tox/
66+
.coverage
67+
.cache
68+
nosetests.xml
69+
coverage.xml
70+
71+
# Translations
72+
*.mo
73+
*.pot
74+
75+
# Sphinx documentation
76+
docs/_build/
77+
78+
# PyBuilder
79+
target/
80+
81+
82+
### Django ###
83+
*.log
84+
*.pot
85+
*.pyc
86+
__pycache__/
87+
local_settings.py
88+
.vscode
89+
90+
.env
91+
db.sqlite3
92+
93+
collectstatic/
94+
.dccache
95+
packages.cmd
96+
**/migrations/*
97+
!**/migrations/__init__.py
98+
media/upload/**
99+
!media/upload/*
100+
uploads/
101+
.confidential
102+
103+
Pipfile
104+
Pipfile.lock
105+
106+
.dockerignore
107+
entrypoint.sh
108+
Dockerfile
109+
docker-compose.yml
110+
111+
!docs/**
112+
113+
painless/utils/sms/token.txt

README.md

+50
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,50 @@
1+
# DigiKala Crawler
2+
3+
This is a django app for scraping data from specified digikala search urls.
4+
5+
**Install dependencies:**
6+
7+
`pip install -r requirements.txt`
8+
9+
**Make migrations:**
10+
11+
`python manage.py makemigrations`
12+
13+
**Migrate:**
14+
15+
`python manage.py migrate`
16+
17+
**Run project:**
18+
19+
`python manage.py runserver`
20+
21+
#API Guide
22+
23+
- **URL**
24+
25+
<http://localhost:8000/api/category/>
26+
27+
28+
- **Method**
29+
30+
<_The request type_>
31+
32+
`POST`
33+
34+
35+
- **Data Params**
36+
37+
- requierd:
38+
39+
digikala search url such as:
40+
41+
`category=https://www.digikala.com/search/?q=%D8%B4%DB%8C%D8%A7%D8%A6%D9%88%D9%85%DB%8C`
42+
43+
44+
- optional:
45+
46+
amount of pages that you want to crawl:
47+
48+
`pages=15`
49+
50+
(default is 5)

kernel/__init__.py

Whitespace-only changes.

kernel/asgi.py

+7
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
import os
2+
3+
from django.core.asgi import get_asgi_application
4+
5+
os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'kernel.settings')
6+
7+
application = get_asgi_application()

kernel/settings/base.py

+71
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,71 @@
1+
from decouple import config
2+
3+
if config('DEBUG', cast=bool):
4+
import mimetypes
5+
mimetypes.add_type("application/javascript", ".js", True)
6+
7+
MIDDLEWARE = [
8+
'django.middleware.security.SecurityMiddleware',
9+
'django.contrib.sessions.middleware.SessionMiddleware',
10+
'django.middleware.common.CommonMiddleware',
11+
'django.middleware.csrf.CsrfViewMiddleware',
12+
'django.contrib.auth.middleware.AuthenticationMiddleware',
13+
'django.contrib.messages.middleware.MessageMiddleware',
14+
'django.middleware.clickjacking.XFrameOptionsMiddleware',
15+
]
16+
17+
AUTHENTICATION_BACKENDS = [
18+
'django.contrib.auth.backends.AllowAllUsersModelBackend'
19+
]
20+
21+
ROOT_URLCONF = 'kernel.urls'
22+
23+
TEMPLATES = [
24+
{
25+
'BACKEND': 'django.template.backends.django.DjangoTemplates',
26+
'APP_DIRS': True,
27+
'OPTIONS': {
28+
'context_processors': [
29+
'django.template.context_processors.debug',
30+
'django.template.context_processors.request',
31+
'django.contrib.auth.context_processors.auth',
32+
'django.contrib.messages.context_processors.messages',
33+
],
34+
},
35+
},
36+
]
37+
38+
WSGI_APPLICATION = 'kernel.wsgi.application'
39+
40+
AUTH_PASSWORD_VALIDATORS = [
41+
{
42+
'NAME': 'django.contrib.auth.password_validation.UserAttributeSimilarityValidator',
43+
},
44+
{
45+
'NAME': 'django.contrib.auth.password_validation.MinimumLengthValidator',
46+
},
47+
{
48+
'NAME': 'django.contrib.auth.password_validation.CommonPasswordValidator',
49+
},
50+
{
51+
'NAME': 'django.contrib.auth.password_validation.NumericPasswordValidator',
52+
},
53+
]
54+
55+
##############################
56+
# INTERNATIONALIZATION #
57+
##############################
58+
59+
TIME_ZONE = 'UTC'
60+
61+
USE_I18N = True
62+
63+
USE_L10N = True
64+
65+
USE_TZ = True
66+
67+
################
68+
# STATIC #
69+
################
70+
71+
STATIC_URL = config('STATIC_URL')

kernel/settings/development.py

+7
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
from .base import *
2+
from .secure import *
3+
from .packages import *
4+
from decouple import config
5+
6+
DEBUG = True
7+
ALLOWED_HOSTS = config('ALLOWED_HOSTS', cast=lambda v: [s.strip() for s in v.split(',')])

kernel/settings/packages.py

+45
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
from decouple import config
2+
3+
DEFAULT_APPS = [
4+
'django.contrib.admin',
5+
'django.contrib.auth',
6+
'django.contrib.contenttypes',
7+
'django.contrib.sessions',
8+
'django.contrib.messages',
9+
'django.contrib.staticfiles',
10+
]
11+
LOCAL_APPS=[
12+
'warehouse'
13+
]
14+
THIRD_PARTY_PACKAGES = [
15+
'rest_framework'
16+
]
17+
18+
INSTALLED_APPS = DEFAULT_APPS + LOCAL_APPS + THIRD_PARTY_PACKAGES
19+
20+
# ############## #
21+
# REST FRAMEWORK #
22+
# ############## #
23+
24+
REST_FRAMEWORK = {
25+
# 'DEFAULT_PERMISSION_CLASSES': [
26+
# 'rest_framework.permissions.DjangoModelPermissionsOrAnonReadOnly'
27+
# ],
28+
# 'DEFAULT_AUTHENTICATION_CLASSES': (
29+
# 'rest_framework_simplejwt.authentication.JWTAuthentication',
30+
# ),
31+
# Versioning
32+
# 'DEFAULT_VERSIONING_CLASS': 'rest_framework.versioning.AcceptHeaderVersioning',
33+
# 'ALLOWED_VERSIONS': ('1.0',),
34+
# Pagination
35+
'DEFAULT_PAGINATION_CLASS': 'rest_framework.pagination.LimitOffsetPagination',
36+
'PAGE_SIZE': 100
37+
}
38+
39+
if config('DEBUG', cast=bool):
40+
REST_FRAMEWORK['DEFAULT_RENDERER_CLASSES'] = [
41+
'rest_framework.renderers.JSONRenderer',
42+
'rest_framework.renderers.BrowsableAPIRenderer'
43+
]
44+
else:
45+
REST_FRAMEWORK['DEFAULT_RENDERER_CLASSES'] = ['rest_framework.renderers.JSONRenderer']

kernel/settings/production.py

+7
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
from .base import *
2+
from .secure import *
3+
from .packages import *
4+
from decouple import config
5+
6+
DEBUG = config('DEBUG', cast=bool)
7+
ALLOWED_HOSTS = config('ALLOWED_HOSTS', cast=lambda v: [s.strip() for s in v.split(',')])

kernel/settings/secure.py

+58
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,58 @@
1+
from decouple import config
2+
3+
4+
SECRET_KEY = config('SECRET_KEY')
5+
PREPEND_WWW = config('PREPEND_WWW', cast=bool)
6+
7+
# ######################### #
8+
# DATABASE #
9+
# ######################### #
10+
11+
# DATABASES = {
12+
# 'default': {
13+
# 'ENGINE': 'django.db.backends.postgresql_psycopg2',
14+
# 'NAME': config('DB_NAME'),
15+
# 'USER': config('DB_USER'),
16+
# 'PASSWORD': config('DB_PASSWORD'),
17+
# 'HOST': config('DB_HOST'),
18+
# 'PORT': config('DB_PORT', cast=int),
19+
# 'TEST': {
20+
# 'NAME': config('DB_TEST'),
21+
# },
22+
# }
23+
# }
24+
DATABASES = {
25+
'default': {
26+
'ENGINE': 'django.db.backends.sqlite3',
27+
'NAME': 'db.sqlite3',
28+
}
29+
}
30+
31+
# ############################ #
32+
# SSL CONFIGURATION #
33+
# ############################ #
34+
SECURE_BROWSER_XSS_FILTER = config('SECURE_BROWSER_XSS_FILTER', cast=bool)
35+
SECURE_CONTENT_TYPE_NOSNIFF = config('SECURE_CONTENT_TYPE_NOSNIFF', cast=bool)
36+
SECURE_HSTS_INCLUDE_SUBDOMAINS = config('SECURE_HSTS_INCLUDE_SUBDOMAINS', cast=bool)
37+
SECURE_HSTS_PRELOAD = config('SECURE_HSTS_PRELOAD', cast=bool)
38+
SECURE_HSTS_SECONDS = config('SECURE_HSTS_SECONDS', cast=int)
39+
40+
if config('SECURE_PROXY_SSL_HEADER', cast=bool):
41+
SECURE_PROXY_SSL_HEADER = ('HTTP_X_FORWARDED_PROTO', 'https')
42+
43+
SECURE_REDIRECT_EXEMPT = []
44+
SECURE_REFERRER_POLICY = config('SECURE_REFERRER_POLICY')
45+
SECURE_SSL_HOST = config('SECURE_SSL_HOST')
46+
SECURE_SSL_REDIRECT = config('SECURE_SSL_REDIRECT', cast=bool)
47+
48+
# ############################ #
49+
# Security #
50+
# ############################ #
51+
CSRF_COOKIE_AGE = config('CSRF_COOKIE_AGE', cast=int)
52+
CSRF_COOKIE_HTTPONLY = config('CSRF_COOKIE_HTTPONLY', cast=bool)
53+
CSRF_COOKIE_NAME = config('CSRF_COOKIE_NAME')
54+
CSRF_COOKIE_PATH = config('CSRF_COOKIE_PATH')
55+
CSRF_COOKIE_SAMESITE = config('CSRF_COOKIE_SAMESITE').capitalize()
56+
CSRF_COOKIE_SECURE = config('CSRF_COOKIE_SECURE', cast=bool)
57+
CSRF_USE_SESSIONS = config('CSRF_USE_SESSIONS', cast=bool)
58+
CSRF_HEADER_NAME = config('CSRF_HEADER_NAME')

kernel/urls.py

+10
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
from django.contrib import admin
2+
from django.urls import(
3+
path,
4+
include
5+
)
6+
7+
urlpatterns = [
8+
path('admin/', admin.site.urls),
9+
path('api/', include('warehouse.api.urls'))
10+
]

kernel/wsgi.py

+7
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
import os
2+
3+
from django.core.wsgi import get_wsgi_application
4+
5+
os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'kernel.settings')
6+
7+
application = get_wsgi_application()

0 commit comments

Comments
 (0)