forked from kaisugi/hyperdoc2vec
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathMedia.py
54 lines (47 loc) · 1.26 KB
/
Media.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
# Author: Pierce Brooks
import os
import sys
import json
from pprint import pprint
from bs4 import BeautifulSoup
def dictify(ul):
result = {}
for li in ul.find_all("li", recursive=False):
keys = li.find_all("a", recursive=True, href=True)
#print(str(keys))
if (len(keys) == 0):
continue
key = keys[0]["href"].replace("/pmwiki/pmwiki.php/", "")
ul = li.find("ul")
if ul:
result[key] = dictify(ul)
else:
result[key] = {}
return result
def run(target):
descriptor = open(target, "r")
content = descriptor.read()
descriptor.close()
soup = BeautifulSoup(content)
uls = soup.find_all("ul", recursive=True)
for ul in uls:
d = dictify(ul)
if not ("Main/Animation" in d):
continue
pprint(d, width=1)
descriptor = open(target+".json", "w")
descriptor.write(json.dumps(d))
descriptor.close()
break
return 0
def launch(arguments):
if (len(arguments) < 2):
return False
target = arguments[1]
result = run(target)
print(str(result))
if not (result == 0):
return False
return True
if (__name__ == "__main__"):
print(str(launch(sys.argv)))