4
4
import time
5
5
import os .path
6
6
7
- driver = webdriver .Chrome ()
8
- with open ('config.json' , 'r' ) as f :
9
- config = json .load (f )
10
-
11
7
# Login to etherscan and auto fill login information if available
12
8
def login ():
13
9
driver .get ('https://etherscan.io/login' )
@@ -19,8 +15,9 @@ def login():
19
15
20
16
input ("Press enter once logged in" )
21
17
18
+
22
19
# Retrieve label information and saves as JSON/CSV
23
- def getLabel (label ,type = 'single' ):
20
+ def getLabel (label , type = 'single' ):
24
21
baseUrl = 'https://etherscan.io/accounts/label/{}?subcatid=0&size=100&start={}'
25
22
index = 0 # Initialize start index at 0
26
23
table_list = []
@@ -31,7 +28,7 @@ def getLabel(label,type='single'):
31
28
try :
32
29
newTable = pd .read_html (driver .page_source )[0 ]
33
30
except ImportError :
34
- print (label ,"Skipping label due to error" )
31
+ print (label , "Skipping label due to error" )
35
32
return
36
33
table_list .append (newTable [:- 1 ]) # Remove last item which is just sum
37
34
index += 100
@@ -42,7 +39,7 @@ def getLabel(label,type='single'):
42
39
df .fillna ('' , inplace = True ) # Replace NaN as empty string
43
40
44
41
# Prints length and save as a csv
45
- print (label ,'Df length:' , len (df .index ))
42
+ print (label , 'Df length:' , len (df .index ))
46
43
df .to_csv ('data/{}.csv' .format (label ))
47
44
48
45
# Save as json object with mapping address:nameTag
@@ -51,14 +48,15 @@ def getLabel(label,type='single'):
51
48
with open ('data/{}.json' .format (label ), 'w' , encoding = 'utf-8' ) as f :
52
49
json .dump (addressNameDict , f , ensure_ascii = True )
53
50
54
- if (type == 'single' ):
51
+ if (type == 'single' ):
55
52
endOrContinue = input (
56
53
'Type "exit" end to end or "label" of interest to continue' )
57
54
if (endOrContinue == 'exit' ):
58
55
driver .close ()
59
56
else :
60
57
getLabel (endOrContinue )
61
58
59
+
62
60
# Retrieves all labels from labelcloud and saves as JSON/CSV
63
61
def getAllLabels ():
64
62
driver .get ('https://etherscan.io/labelcloud' )
@@ -77,14 +75,23 @@ def getAllLabels():
77
75
78
76
for label in labels :
79
77
if (os .path .exists ('data/{}.csv' .format (label ))):
80
- print (label ,'already exists skipping.' )
78
+ print (label , 'already exists skipping.' )
81
79
continue
82
- getLabel (label ,'all' )
83
- time .sleep (5 ) # Give 5s interval to prevent RL
80
+ elif label in ignore_list :
81
+ print (label ,'ignored due to large size and irrelevance' )
82
+ continue
83
+ getLabel (label , 'all' )
84
+ time .sleep (5 ) # Give 5s interval to prevent RL
85
+
86
+
87
+ ignore_list = ['eth2-depositor' , 'gnosis-safe-multisig' ]
88
+ with open ('config.json' , 'r' ) as f :
89
+ config = json .load (f )
90
+ driver = webdriver .Chrome ()
84
91
85
92
login ()
86
93
retrievalType = input ('Enter retrieval type (single/all): ' )
87
- if (retrievalType == 'all' ):
94
+ if (retrievalType == 'all' ):
88
95
getAllLabels ()
89
96
else :
90
97
singleLabel = input ('Enter label of interest: ' )
0 commit comments