forked from UTDNebula/api-tools
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathcourseParser.go
115 lines (99 loc) · 4.48 KB
/
courseParser.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
package parser
import (
"fmt"
"regexp"
"strconv"
"github.com/PuerkitoBio/goquery"
"github.com/UTDNebula/api-tools/utils"
"github.com/UTDNebula/nebula-api/api/schema"
"go.mongodb.org/mongo-driver/bson/primitive"
)
var (
// coursePrefixRegexp matches the course prefix and number (e.g., "CS 1337").
coursePrefixRegexp = utils.Regexpf(`^%s`, utils.R_SUBJ_COURSE_CAP)
// contactRegexp matches the contact hours and offering frequency from the course description
// (e.g. "(12-34) SUS")
contactRegexp = regexp.MustCompile(`\(([0-9]+)-([0-9]+)\)\s+([SUFY]+)`)
)
// parseCourse returns a pointer to the course specified by the
// provided information. If the associated course is not found in
// Courses, it will run getCourse and add the result to Courses.
func parseCourse(internalCourseNumber string, session schema.AcademicSession, rowInfo map[string]*goquery.Selection, classInfo map[string]string) *schema.Course {
// Courses are internally keyed by their internal course number and the catalog year they're part of
catalogYear := getCatalogYear(session)
courseKey := internalCourseNumber + catalogYear
// Don't recreate the course if it already exists
course, courseExists := Courses[courseKey]
if courseExists {
return course
}
course = getCourse(internalCourseNumber, session, rowInfo, classInfo)
// Get closure for parsing course requisites (god help me)
enrollmentReqs, hasEnrollmentReqs := rowInfo["Enrollment Reqs:"]
ReqParsers[course.Id] = getReqParser(course, hasEnrollmentReqs, enrollmentReqs)
Courses[courseKey] = course
CourseIDMap[course.Id] = courseKey
return course
}
// getCourse extracts course details from the provided information and creates a schema.Course object.
// This function does not modify any global state.
// Returns a pointer to the newly created schema.Course object.
func getCourse(internalCourseNumber string, session schema.AcademicSession, rowInfo map[string]*goquery.Selection, classInfo map[string]string) *schema.Course {
CoursePrefix, CourseNumber := getPrefixAndNumber(classInfo)
course := schema.Course{
Id: primitive.NewObjectID(),
Course_number: CourseNumber,
Subject_prefix: CoursePrefix,
Title: utils.TrimWhitespace(rowInfo["Course Title:"].Text()),
Description: utils.TrimWhitespace(rowInfo["Description:"].Text()),
School: utils.TrimWhitespace(rowInfo["College:"].Text()),
Credit_hours: classInfo["Semester Credit Hours:"],
Class_level: classInfo["Class Level:"],
Activity_type: classInfo["Activity Type:"],
Grading: classInfo["Grading:"],
Internal_course_number: internalCourseNumber,
Catalog_year: getCatalogYear(session),
}
// Try to get lecture/lab contact hours and offering frequency from course description
contactMatches := contactRegexp.FindStringSubmatch(course.Description)
// Length of contactMatches should be 4 upon successful match
if len(contactMatches) == 4 {
course.Lecture_contact_hours = contactMatches[1]
course.Laboratory_contact_hours = contactMatches[2]
course.Offering_frequency = contactMatches[3]
}
return &course
}
// getCatalogYear determines the catalog year from the academic session information.
// It assumes the session name starts with a 2-digit year and a semester character ('F', 'S', 'U').
// Fall (S) and Summer U sessions are associated with the previous calendar year.
// (e.g, 20F = 20, 20S = 19)
func getCatalogYear(session schema.AcademicSession) string {
sessionYear, err := strconv.Atoi(session.Name[0:2])
if err != nil {
panic(err)
}
sessionSemester := session.Name[2]
switch sessionSemester {
case 'F':
return strconv.Itoa(sessionYear)
case 'S', 'U':
return strconv.Itoa(sessionYear - 1)
default:
panic(fmt.Errorf("encountered invalid session semester '%c!'", sessionSemester))
}
}
// getPrefixAndNumber returns the 2nd and 3rd matched values from a coursePrefixRegexp on
// `ClassInfo["Class Section:"]`. It expects ClassInfo to contain "Class Section:" key.
// If there are no matches, empty strings are returned.
func getPrefixAndNumber(classInfo map[string]string) (string, string) {
if sectionId, ok := classInfo["Class Section:"]; ok {
// Get subject prefix and course number by doing a regexp match on the section id
matches := coursePrefixRegexp.FindStringSubmatch(sectionId)
if len(matches) == 3 {
return matches[1], matches[2]
}
panic("failed to course prefix and number")
}
panic("could not find 'Class Section:' in ClassInfo")
}