-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathNews.py
51 lines (42 loc) · 1.2 KB
/
News.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
# -*- coding: utf-8 -*-
import re
import time
import urllib
import urllib2
def news():
m = re.findall(r""""/articles/ + # date
\w * # 2014
/ #/
\w\w #month
- #-
\w* #day
/ #/
\w * #words
.htm"
\s* # white space
target="_blank">
\S*\s*\S*\s*\S*
</a>""", urllib.urlopen("http://news.hit.edu.cn").read(), re.M|re.X)
#T = time.strftime('%Y/%m-%d', time.localtime(time.time()))
T = "2014/10-24"
URL = []
News = []
for i in m:
date = i[11:21]
j = i.split(' target="_blank">')
# print j
#print date
if(cmp(date, T) != 0):
continue
url = "http://news.hit.edu.cn"
url += j[0][1:-1]
cnt = URL.count(url)
if(cnt != 0):
continue
URL.append(url)
print url
nnews = j[1][0:-4]
print nnews
News.append(url + "$" + nnews)
return News
news()