Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: Add course remaining page crawler #6

Open
wants to merge 5 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
104 changes: 104 additions & 0 deletions DB.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,52 @@ def __init__(self, location: str) -> None:
cur.execute("CREATE TABLE IF NOT EXISTS TEACHER ( id TEXT, name TEXT, PRIMARY KEY ( id, name ) )")
cur.execute("CREATE TABLE IF NOT EXISTS RATE ( courseId TEXT NOT NULL, rowId TEXT NOT NULL, teacherId TEXT, content TEXT, contentEn TEXT, PRIMARY KEY (courseId, rowId) )")
cur.execute("CREATE TABLE IF NOT EXISTS RESULT ( courseId TEXT, yearsem TEXT, name TEXT, teacher TEXT, time TEXT, studentLimit INTEGER, studentCount INTEGER, lastEnroll INTEGER, PRIMARY KEY (courseId))")
cur.execute(
"""CREATE TABLE IF NOT EXISTS REMAIN (
id TEXT NOT NULL,
signableAdding BOOLEAN,
waitingList INTEGER,
originLimit INTEGER,
originRegistered INTEGER,
originAvailable INTEGER,
allLimit INTEGER,
allRegistered INTEGER,
allAvailable INTEGER,
otherDeptLimit INTEGER,
otherDeptRegistered INTEGER,
otherDeptAvailable INTEGER,
sameGradeLimit INTEGER,
sameGradeRegistered INTEGER,
sameGradeAvailable INTEGER,
diffGradeLimit INTEGER,
diffGradeRegistered INTEGER,
diffGradeAvailable INTEGER,
minorLimit INTEGER,
minorRegistered INTEGER,
minorAvailable INTEGER,
doubleMajorLimit INTEGER,
doubleMajorRegistered INTEGER,
doubleMajorAvailable INTEGER,
otherDeptInCollegeLimit INTEGER,
otherDeptInCollegeRegistered INTEGER,
otherDeptInCollegeAvailable INTEGER,
otherCollegeLimit INTEGER,
otherCollegeRegistered INTEGER,
otherCollegeAvailable INTEGER,
programLimit INTEGER,
programRegistered INTEGER,
programAvailable INTEGER,
sameGradeAndAboveLimit INTEGER,
sameGradeAndAboveRegistered INTEGER,
sameGradeAndAboveAvailable INTEGER,
lowerGradeLimit INTEGER,
lowerGradeRegistered INTEGER,
lowerGradeAvailable INTEGER,
otherProgramLimit INTEGER,
otherProgramRegistered INTEGER,
otherProgramAvailable INTEGER,
PRIMARY KEY ( id ));
""")

def addRate(self, rowId: str, courseId: str, teacherId: str, content: str, contentEn: str):
cur = self.con.cursor()
Expand Down Expand Up @@ -173,6 +219,64 @@ def isRateExist(self, courseId: str):
request = cur.execute('SELECT COUNT( DISTINCT courseId) FROM RATE WHERE courseId = ?', [courseId])
response = request.fetchone()
return response[0] > 0

def getThisSemesterCourseWithRemainUrl(self, y: str, s: str):
cur = self.con.cursor()
request = cur.execute("SELECT id, subRemainUrl FROM COURSE WHERE y = ? AND s = ?", [y, s])
response = request.fetchall()

result_list = []
for row in response:
row_dict = dict(zip([desc[0] for desc in request.description], row))
result_list.append(row_dict)

return result_list

def addRemain(self, courseData: dict):
print(courseData)
cur = self.con.cursor()
cur.execute(
"""INSERT OR REPLACE INTO REMAIN (
id, signableAdding, waitingList,
originLimit, originRegistered, originAvailable,
allLimit, allRegistered, allAvailable,
otherDeptLimit, otherDeptRegistered, otherDeptAvailable,
sameGradeLimit, sameGradeRegistered, sameGradeAvailable,
diffGradeLimit, diffGradeRegistered, diffGradeAvailable,
minorLimit, minorRegistered, minorAvailable,
doubleMajorLimit, doubleMajorRegistered, doubleMajorAvailable,
otherDeptInCollegeLimit, otherDeptInCollegeRegistered, otherDeptInCollegeAvailable,
programLimit, programRegistered, programAvailable,
sameGradeAndAboveLimit, sameGradeAndAboveRegistered, sameGradeAndAboveAvailable,
lowerGradeLimit, lowerGradeRegistered, lowerGradeAvailable,
otherProgramLimit, otherProgramRegistered, otherProgramAvailable,
otherCollegeLimit, otherCollegeRegistered, otherCollegeAvailable
) VALUES ( ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ? );""",
(
courseData["id"], courseData["signableAdding"], courseData["waitingList"],
courseData["originLimit"], courseData["originRegistered"], courseData["originAvailable"],
courseData["allLimit"], courseData["allRegistered"], courseData["allAvailable"],
courseData["otherDeptLimit"], courseData["otherDeptRegistered"], courseData["otherDeptAvailable"],
courseData["sameGradeLimit"], courseData["sameGradeRegistered"], courseData["sameGradeAvailable"],
courseData["diffGradeLimit"], courseData["diffGradeRegistered"], courseData["diffGradeAvailable"],
courseData["minorLimit"], courseData["minorRegistered"], courseData["minorAvailable"],
courseData["doubleMajorLimit"], courseData["doubleMajorRegistered"], courseData["doubleMajorAvailable"],
courseData["otherDeptLimit"], courseData["otherDeptRegistered"], courseData["otherDeptAvailable"],
courseData["programLimit"], courseData["programRegistered"], courseData["programAvailable"],
courseData["sameGradeAndAboveLimit"], courseData["sameGradeAndAboveRegistered"], courseData["sameGradeAndAboveAvailable"],
courseData["lowerGradeLimit"], courseData["lowerGradeRegistered"], courseData["lowerGradeAvailable"],
courseData["otherProgramLimit"], courseData["otherProgramRegistered"], courseData["otherProgramAvailable"],
courseData["otherCollegeLimit"], courseData["otherCollegeRegistered"], courseData["otherCollegeAvailable"],
)
)
self.con.commit()

def getRemaining(self, y: str, s: str, courseId: str):
cur = self.con.cursor()
request = cur.execute("SELECT * FROM REMAIN WHERE id = ?", [y+s+courseId])
response = request.fetchone()
available_dict = dict(zip([desc[0] for desc in request.description if desc[0] != None], response))
return {key: value for key, value in available_dict.items() if value is not None}

if __name__ == "__main__":
db = DB("test.db")
Expand Down
64 changes: 64 additions & 0 deletions fetchRemain.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
import logging

import requests
from bs4 import BeautifulSoup

PROPERTY_NAME = {
"專業基礎(開放系所)人數": "origin",
"其他系所": "otherDept",
"總人數": "all",
"本系本班Dept./Class": "origin",
"本系非本班同年級Other Classes in Dept., Same Year": "sameGrade",
"本系非本班不同年級Other Classes in Dept., Dif. Year": "diffGrade",
"輔系Minor": "minor",
"雙主修Double-Major": "doubleMajor",
"全系Dept.": "origin",
"本院非本系Other Depts. in the College": "otherDeptInCollege",
"非本院Other Colleges": "otherCollege",
"學分學程": "program",
"全校All Colleges": "all",
"本學程開課年級(含)以上Same Year (and above) in the Program": "sameGradeAndAbove",
"本學程其他低年級Year Below you in the Program": "lowerGrade",
"本院非學程限制人數Maximum Limits for Other Programs in the College": "otherProgram",
"外院限制人數Maximum Limits for Other Colleges": "otherCollege",
"總限制人數Overall Maximum Limits": "all",
}
ROW_NAME = {
"限制人數 / Maximum limit": "Limit",
"選課人數 / Number Registered": "Registered",
"餘額 / Number of Available Spaces": "Available"
}


def fetchRemain(fetch_url: str):
result = {prop+row: None for prop in PROPERTY_NAME.values() for row in ROW_NAME.values()}

try:
response = requests.get(fetch_url.replace("https://", "http://"))
response.raise_for_status()

soap = BeautifulSoup(response.content, "html.parser")

table = soap.find("div", {"class": "maintain_profile_content_table"}).find_all("tr")
open_to_signable_adding = table[5].find_all("td")[1].text
result["signableAdding"] = True if open_to_signable_adding == "是" else False

number_on_waiting_list = table[6].find_all("td")[1].find("a").text
result["waitingList"] = int(number_on_waiting_list) if number_on_waiting_list.isdigit() else number_on_waiting_list

table = soap.find("table", {"id": "tclmtcntGV"}).find_all("tr")
for _, prop in enumerate(table[0]):
if prop.get_text(strip=True) != "":
for row in table[1:]:
cells = row.find_all("td")
for td in cells:
result[PROPERTY_NAME[prop.text] + ROW_NAME[cells[0].text]] = int(td.text) if td.text.isdigit() else td.text
except Exception as e:
logging.error(e)

return result


if __name__ == "__main__":
print(fetchRemain(
"https://selectcourse.nccu.edu.tw/remain/goGenDetail.aspx?view=7735414C415774495851503054646C41713573494F513D3D"))
20 changes: 20 additions & 0 deletions main.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
from constant import YEAR_SEM, YEAR, SEM, COURSERESULT_CSV, COURSERESULT_YEARSEM
from fetchDescription import fetchDescription
from fetchRate import fetchRate
from fetchRemain import fetchRemain
# from translateRate import translateRate

allSemesters = [
Expand Down Expand Up @@ -45,6 +46,7 @@
parser.add_argument("--teacher", action="store_true", help="Fetch teacher")
parser.add_argument("--rate", action="store_true", help="Fetch rate")
parser.add_argument("--result", action="store_true", help="Fetch result")
parser.add_argument("--remaining", action="store_true", help="Fetch remain")
parser.add_argument("--db", help="Database name", default="test.db")
args = parser.parse_args()

Expand Down Expand Up @@ -353,3 +355,21 @@
logging.error(err)
continue
i += 1
# ==============================
# \ 5. Course Remaining \
# ==============================
if args.remaining:

course_dict = db.getThisSemesterCourseWithRemainUrl(y=YEAR, s=SEM)

tqdmCourseRemains = tqdm.tqdm(course_dict, total=len(course_dict), leave=False)
for course in tqdmCourseRemains:
try:
res = fetchRemain(course["subRemainUrl"])
res["id"] = course["id"]
db.addRemain(res)
except Exception as e:
logging.error(e)
continue
else:
print("skipping Fetch Class Remaining")