-
Notifications
You must be signed in to change notification settings - Fork 1
/
course.go
247 lines (222 loc) · 7.53 KB
/
course.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
package main
import (
"fmt"
"io/ioutil"
"log"
"net/http"
"regexp"
"strconv"
"strings"
"time"
"github.com/kennygrant/sanitize"
)
var (
httpSemaphore = make(chan int, MaxHTTPRequests) // used to limit the # of HTTP requests at a time
re = regexp.MustCompile(`(\w{4})(\w{4})(\w)(\w{3})`) // EXAMPLE: COMS4995W001 => [COMS, 4995, W, 001]
tags = regexp.MustCompile(`(?s:<.+?>)`) // meant to match all HTML tags
// TODO: repent for this hidiousness
desc = regexp.MustCompile(`[.\n]*Course Description</td>\n <td bgcolor=#DADADA>(?s:.*)<tr valign=top><td bgcolor=#99CCFF>Web Site</td>[.\n]*`)
// used to parse the Meets1 parameter into useful pieces
meetsOn = window{0, 7}
startTime = window{7, 13}
endTime = window{14, 20}
building = window{24, 35}
room = window{35, -1}
)
// helper struct for fill()
type window struct {
lower, upper int
}
func (w window) parse(s string) string {
if w.lower > len(s)-1 {
return ""
} else if w.upper > len(s)-1 {
return strings.Replace(s[w.lower:], " ", "", -1)
}
if w.lower < 0 {
return strings.Replace(s[:w.upper], " ", "", -1)
} else if w.upper < 0 {
return strings.Replace(s[w.lower:], " ", "", -1)
}
return strings.Replace(s[w.lower:w.upper], " ", "", -1)
}
// helper method for fill()
func zeroInt(s string) string {
n, _ := strconv.Atoi(s)
return strconv.FormatInt(int64(n), 10)
}
// helper method for fill()
func parseDate(t string) string {
if tm, err := time.Parse("15:04P", t); err == nil {
return tm.Format("15:04:05")
}
return "00:00:00"
}
// standardizes information in a Course
func (c *Course) fill() {
if c.Meets1 == "" {
c.StartTime1 = "00:00:00"
c.EndTime1 = "00:00:00"
} else {
s := c.Meets1
c.MeetsOn1 = meetsOn.parse(s)
c.StartTime1 = parseDate(startTime.parse(s))
c.EndTime1 = parseDate(endTime.parse(s))
c.Building1 = building.parse(s)
c.Room1 = room.parse(s)
}
if c.Meets2 == "" {
c.StartTime2 = "00:00:00"
c.EndTime2 = "00:00:00"
} else {
s := c.Meets2
c.MeetsOn2 = meetsOn.parse(s)
c.StartTime2 = parseDate(startTime.parse(s))
c.EndTime2 = parseDate(endTime.parse(s))
c.Building2 = building.parse(s)
c.Room2 = room.parse(s)
}
c.NumFixedUnits = zeroInt(c.NumFixedUnits)
c.MinUnits = zeroInt(c.MinUnits)
c.MaxUnits = zeroInt(c.MaxUnits)
c.CallNumber = zeroInt(c.CallNumber)
c.NumEnrolled = zeroInt(c.NumEnrolled)
c.MaxSize = zeroInt(c.MaxSize)
c.setCourseFull()
c.setBulletinURL()
}
// parses the 'CourseFull' attribute
func (c *Course) setCourseFull() {
res := re.FindStringSubmatch(strings.Replace(c.Course, " ", "_", 6))
if len(res) != 5 {
log.Printf("Failed to parse given 'Course', %s. found %#v", c.Course, res)
}
// set up the "Course Full"
dept, deptNum, symbol := res[1], res[2], res[3]
c.CourseFull = dept + symbol + deptNum
c.ShortCourse = dept + deptNum
}
func (c *Course) setBulletinURL() {
courseRegex := re.FindStringSubmatch(strings.Replace(c.Course, " ", "_", 6))
dept, deptNum, symbol, section := courseRegex[1], courseRegex[2], courseRegex[3], courseRegex[4]
// GOAL: http://www.columbia.edu/cu/bulletin/uwb/subj/COMS/W4995-20143-001/
c.BulletinURL = fmt.Sprintf("http://www.columbia.edu/cu/bulletin/uwb/subj/%s/%s-%s-%s/",
dept,
symbol+deptNum,
c.Term,
section,
)
}
func parsePage(page []byte) string {
res := desc.FindStringSubmatch(string(page))
if len(res) != 1 {
return ""
}
// remove all tags
s := tags.ReplaceAllString(res[0], "")
// remove static words
s = strings.TrimSpace(strings.Replace(strings.Replace(s, "Web Site", "", 1), "Course Description", "", 1))
// remove special characters and return
return sanitize.Accents(s)
}
// scrapes the bulletin to get the description of a course
func (c *Course) getDescription() error {
// locks while requesting
httpSemaphore <- 1
resp, err := http.Get(c.BulletinURL)
defer resp.Body.Close()
<-httpSemaphore
// check for errors
if err != nil {
c.BulletinURL = ""
log.Printf("Error getting bulletin page, %s => %s", c.BulletinURL, err.Error())
return fmt.Errorf("HTTP error querying bulletin for course, %s, %s", c.Course, err.Error())
} else if resp.StatusCode/100 != 2 {
c.BulletinURL = ""
log.Printf("%d error getting bulletin page, %s", resp.StatusCode, c.BulletinURL)
return fmt.Errorf("Error querying bulletin for course, %s", c.Course)
}
// read in then sanitize description
bodyBytes, err := ioutil.ReadAll(resp.Body)
if err != nil {
log.Printf("Error reading in page (%s) body => %s", c.BulletinURL, err.Error())
}
// parse the page for the description
courseDesc := parsePage(bodyBytes)
if courseDesc == "" { // set to 'no description' if there is not one
c.Description = "no description"
return nil
}
c.Description = courseDesc
return nil
}
// Course holds all information about an instance of a course
type Course struct {
Course2
Section
Course string `json:",omitempty"`
ShortCourse string `json:"-"`
ChargeMsg1 string `json:",omitempty"`
ChargeAmt1 string `json:",omitempty"`
ChargeMsg2 string `json:",omitempty"`
ChargeAmt2 string `json:",omitempty"`
}
// Course2 holds all information a Course offered (ignoring section details)
type Course2 struct {
CourseFull string `json:",omitempty"`
PrefixName string `json:",omitempty"`
DivisionCode string `json:",omitempty"`
DivisionName string `json:",omitempty"`
SchoolCode string `json:",omitempty"`
SchoolName string `json:",omitempty"`
DepartmentCode string `json:",omitempty"`
DepartmentName string `json:",omitempty"`
SubtermCode string `json:",omitempty"`
SubtermName string `json:",omitempty"`
EnrollmentStatus string `json:",omitempty"`
NumFixedUnits string `json:",omitempty"`
MinUnits string `json:",omitempty"`
MaxUnits string `json:",omitempty"`
CourseTitle string `json:",omitempty"`
CourseSubtitle string `json:",omitempty"`
Approval string `json:",omitempty"`
BulletinFlags string `json:",omitempty"`
ClassNotes string `json:",omitempty"`
PrefixLongname string `json:",omitempty"`
Description string `json:",omitempty"`
}
// Section holds all information about a course's individual section
type Section struct {
BulletinURL string `json:",omitempty"`
SectionFull string `json:",omitempty"`
Term string `json:",omitempty"`
MeetsOn1 string `json:",omitempty"`
StartTime1 string `json:",omitempty"`
EndTime1 string `json:",omitempty"`
Building1 string `json:",omitempty"`
Room1 string `json:",omitempty"`
MeetsOn2 string `json:",omitempty"`
StartTime2 string `json:",omitempty"`
EndTime2 string `json:",omitempty"`
Building2 string `json:",omitempty"`
Room2 string `json:",omitempty"`
CallNumber string `json:",omitempty,int"`
CampusCode string `json:",omitempty"`
CampusName string `json:",omitempty"`
NumEnrolled string `json:",omitempty,int"`
MaxSize string `json:",omitempty,int"`
TypeCode string `json:",omitempty"`
TypeName string `json:",omitempty"`
Meets1 string `json:",omitempty"`
Meets2 string `json:",omitempty"`
Meets3 string `json:",omitempty"`
Meets4 string `json:",omitempty"`
Meets5 string `json:",omitempty"`
Meets6 string `json:",omitempty"`
Instructor1Name string `json:",omitempty"`
Instructor2Name string `json:",omitempty"`
Instructor3Name string `json:",omitempty"`
Instructor4Name string `json:",omitempty"`
ExamMeet string `json:",omitempty"`
ExamDate string `json:",omitempty"`
}