-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtmp.py
74 lines (51 loc) · 1.98 KB
/
tmp.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
def duplicateSamples(sampleList, targetNum=700):
sampleNum = len(sampleList)
scale, remain = targetNum // sampleNum, targetNum % sampleNum
# if sample number is greater than target number, return # targetNum
if scale == 0:
return sampleList[:targetNum]
if remain == 0:
return sampleList * scale
else:
return sampleList * scale + sampleList[:remain]
def generateList(filename):
with open(filename, 'r') as f:
lines = f.readlines()
# init dictionary for saving all the images.
cls_dict = {}
for line in lines:
splt = line.split(' ')
path, label = splt[0], int(splt[1])
if label not in cls_dict.keys():
cls_dict[label] = [path]
else:
cls_dict[label].append(path)
return cls_dict # save all the data by class
def generateData(cls_dict, step=None, dpFlag=False, targetNum=700):
# shuffle the list and pick 1/20 samples
pathset = []
labelset = []
random.seed(2222)
for key in cls_dict.keys():
sample_num = len(cls_dict[key])
#print("The {}-th class has {:5d} samples before downsample.".format(key, sample_num))
#print("shuffle the list and pick 1/20 samples")
random.shuffle(cls_dict[key])
if step is not None:
cls_dict[key] = cls_dict[key][::step]
if dpFlag:
cls_dict[key] = duplicateSamples(sampleList=cls_dict[key], targetNum=targetNum)
sample_num = len(cls_dict[key])
#print("The {:5}-th class has {:5d} samples after downsample.".format(key, sample_num))
#print("First 3 samples\n {}".format(cls_dict[key][:3]))
# get the downsampled list
pathset += cls_dict[key]
labelset += sample_num * [key]
pathAndLabel = zip(pathset, labelset)
random.shuffle(pathAndLabel)
pathset = []
labelset = []
for tmp in pathAndLabel:
pathset.append(tmp[0])
labelset.append(tmp[1])
return pathset, labelset