-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathget_pub_dates.py
executable file
·68 lines (51 loc) · 2.25 KB
/
get_pub_dates.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
#!/usr/bin/env python3
import requests
import xmltodict
import json
from datetime import datetime
PUBLISH_DATES = {
'2013': datetime(2021, 2, 28, 12, 00, 3).isoformat() + '.000Z',
'2014': datetime(2021, 3, 31, 12, 00, 3).isoformat() + '.000Z',
'2015': datetime(2021, 4, 30, 12, 00, 3).isoformat() + '.000Z',
'2016': datetime(2021, 5, 31, 12, 00, 3).isoformat() + '.000Z',
'2017': datetime(2021, 6, 30, 12, 00, 3).isoformat() + '.000Z',
'2018': datetime(2021, 7, 31, 12, 00, 3).isoformat() + '.000Z',
'2019': datetime(2021, 8, 31, 12, 00, 3).isoformat() + '.000Z',
'2020': datetime(2021, 9, 30, 12, 00, 3).isoformat() + '.000Z',
'2021': datetime(2021, 10, 31, 12, 00, 3).isoformat() + '.000Z',
}
def get_podcasts(rss):
all_podcasts = []
for item in rss['channel']:
if item == 'item':
for podcast in rss['channel'][item]:
short_description = ''
this_podcast = {}
for head in podcast:
if head == 'title':
this_podcast.update({'title': podcast[head]})
elif head == 'pubDate':
pub_year = podcast[head].split()[3]
pub_date = datetime.strptime(podcast[head][5:-6], '%d %b %Y %H:%M:%S').isoformat() + '.000Z'
this_podcast.update({'publish_at': PUBLISH_DATES[pub_year]})
this_podcast.update({'pub_date': pub_date})
print
all_podcasts.append(this_podcast)
return all_podcasts
def main():
print('Start RSS parsing...')
r = requests.get('https://linkmeup.ru/rss/podcasts')
rss = xmltodict.parse(r.text)['rss']
all_podcasts = get_podcasts(rss)
with open('all_podcasts_w_mp4.json') as f:
podcasts = json.load(f)
for podcast in podcasts:
for i in all_podcasts:
if podcast['title'] == i['title']:
podcast.update({'recordingDate': i['pub_date']})
podcast.update({'publishAt': i['publish_at']})
with open('all_podcasts_w_pd.json', 'w') as f:
json.dump(podcasts, f)
print('Pub dates are added and dict saved to all_podcasts_w_pd.json.')
if __name__ == '__main__':
main()