-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathzdf.go
126 lines (113 loc) · 2.59 KB
/
zdf.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
package main
import (
"code.google.com/p/go-html-transform/h5"
"code.google.com/p/go.net/html"
"github.com/gorilla/mux"
"net/url"
"path/filepath"
"regexp"
"strconv"
"strings"
"time"
)
var (
zdfHost = "http://www.zdf.de"
zdfMediathek = "/ZDFmediathek/"
zdfRouter = mux.NewRouter()
zdfDay = zdfRouter.NewRoute().
Path(zdfMediathek+"hauptnavigation/sendung-verpasst/day{d}").
Queries("flash", "off")
)
func Zdf(f File, ch chan File) {
for i := 0; i <= 7; i++ {
zdfDayUrl, _ := zdfDay.URL("d", strconv.Itoa(i))
up, _ := url.Parse(zdfHost + zdfDayUrl.String())
u := *up
var root h5.Tree
root, err := grabParse(u)
if err != nil {
f.SendErr(ch, &err)
return
}
visited := make(map[url.URL]bool)
treeContains(root, "div", "class", "row", func(t h5.Tree) {
treeFind(t, "a", "href", func(t h5.Tree, a html.Attribute) {
epPath := url.URL{Path: a.Val}
u := *u.ResolveReference(&epPath)
if _, ok := visited[u]; ok {
return
} else {
visited[u] = true
}
root, err := grabParse(u)
if err != nil {
f.SendErr(ch, &err)
return
}
treeFind(root, "a", "href",
func(t h5.Tree, a html.Attribute) {
v := a.Val
if strings.Contains(v, "veryhigh") && // high quality
strings.Contains(v, "hstreaming.") { // .mov format
name := filepath.Base(v)
u, err := url.Parse(v)
nf := f
if err == nil {
nf.Url = *u
} else {
nf.Err = err
}
nf.Path += name
ch <- nf.SetLeaf()
}
})
})
})
}
}
func grabParse(u url.URL) (t h5.Tree, err error) {
c, err := grabHttp(u.String())
if err != nil {
return
}
tp, err := h5.NewFromString(c)
if err != nil {
return
}
t = *tp
return
}
func treeContains(t h5.Tree, e, key, val string, f func(h5.Tree)) {
treeFind(t, e, key, func(t h5.Tree, a html.Attribute) {
f(t)
})
}
func treeFind(t h5.Tree, e, key string, f func(h5.Tree, html.Attribute)) {
t.Walk(func(n *html.Node) {
if a := getAttr(n, key); n.Data == e && a != nil {
f(h5.NewTree(n), *a)
}
})
}
func getAttr(n *html.Node, key string) *html.Attribute {
for _, a := range n.Attr {
if key == a.Key {
return &a
}
}
return nil
}
func parseDate(url string) time.Time {
r := regexp.MustCompile(`/\d\d/\d\d/(\d\d)(\d\d)(\d\d)`)
ms := r.FindAllStringSubmatch(url, 1)
if len(ms) == 1 {
m := ms[0]
y, e1 := strconv.Atoi(m[1])
mo, e2 := strconv.Atoi(m[2])
d, e3 := strconv.Atoi(m[3])
if e1 != nil && e2 != nil && e3 != nil {
return time.Date(y+2000, time.Month(mo), d, 0, 0, 0, 0, time.UTC)
}
}
return time.Now()
}