-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathKITTIConverter.py
137 lines (115 loc) · 5.85 KB
/
KITTIConverter.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
import os
import xml.etree.ElementTree as ET
from VOCConverter import ToVOCConverter
class KITTItoVOCConverter(ToVOCConverter):
'''
In KITTI each image has its own label file with the same name and different extension
Each label file has 1 line per label
Each column is:
#Values Name Description
----------------------------------------------------------------------------
1 type Describes the type of object: 'Car', 'Van', 'Truck',
'Pedestrian', 'Person_sitting', 'Cyclist', 'Tram',
'Misc' or 'DontCare'
1 truncated Float from 0 (non-truncated) to 1 (truncated), where
truncated refers to the object leaving image boundaries
1 occluded Integer (0,1,2,3) indicating occlusion state:
0 = fully visible, 1 = partly occluded
2 = largely occluded, 3 = unknown
1 alpha Observation angle of object, ranging [-pi..pi]
4 bbox 2D bounding box of object in the image (0-based index):
contains left, top, right, bottom pixel coordinates
3 dimensions 3D object dimensions: height, width, length (in meters)
3 location 3D object location x,y,z in camera coordinates (in meters)
1 rotation_y Rotation ry around Y-axis in camera coordinates [-pi..pi]
1 score Only for results: Float, indicating confidence in
detection, needed for p/r curves, higher is better.
'''
def __init__(self,imageFolder,sourceLabelFolder,outputLabelFolder):
'''
imageFolder: folder where all the images are
sourceLabelFolder: path to folder where source data is saved
outputLabelFolder: path where ouptut files are to be saved
'''
super().__init__(imageFolder,sourceLabelFolder,outputLabelFolder)
self.imageFormat = ".png"
self.database = "KITTI"
def createXMLLabelFile(self,labelFileName):
'''
take in a KITTI label file for an image (labelFileName) and save out the Pascal VOC format label file
'''
with open(os.path.join(self.sourceLabelFolder,labelFileName), "r") as f:
self.currentOutFile = os.path.join(self.outputLabelFolder,labelFileName[:labelFileName.rfind(".txt")] + ".xml")
self.currentImageFile = labelFileName[:labelFileName.rfind(".txt")] + self.imageFormat
labelXML = self.initializeXMLFile()
for l in f.readlines():
line = l.split(" ")
objectLabel = self.createXMLLabel(line)
labelXML.append(objectLabel)
tree = ET.ElementTree(labelXML)
tree.write(self.currentOutFile)
def createXMLLabel(self,line):
'''
Create a xml style label for the line that is passed in. Return the xml data
'''
label = line[0]
truncated = line[1]
occluded = line[2]
alpha = line[3]
bboxLeft = line[4]
bboxTop = line[5]
bboxRight = line[6]
bboxBottom = line[7]
dimHeight = line[8]
dimWidth = line[9]
dimLength = line[10]
locX = line[11]
locY = line[12]
locZ = line[13]
roty = line[14]
objectLabel = ET.Element("object")
ET.SubElement(objectLabel,"name").text = label.lower()
ET.SubElement(objectLabel,"truncated").text = truncated
ET.SubElement(objectLabel,"occluded").text = occluded
ET.SubElement(objectLabel,"alpha").text = alpha
bndbox = ET.SubElement(objectLabel,"bndbox")
ET.SubElement(bndbox,"xmin").text = bboxLeft
ET.SubElement(bndbox,"ymin").text = bboxTop
ET.SubElement(bndbox,"xmax").text = bboxRight
ET.SubElement(bndbox,"ymax").text = bboxBottom
dimensions = ET.SubElement(objectLabel,"dimensions")
ET.SubElement(dimensions,"height").text = dimHeight
ET.SubElement(dimensions,"width").text = dimWidth
ET.SubElement(dimensions,"length").text = dimLength
location = ET.SubElement(objectLabel,"location")
ET.SubElement(location,"x").text = locX
ET.SubElement(location,"y").text = locY
ET.SubElement(location,"z").text = locZ
ET.SubElement(objectLabel,"rotation_y").text = roty
return objectLabel
def convertDataset(self,verbose=False):
'''
Convert the entire dataset
'''
# find all the label files
labelFiles = os.listdir(self.sourceLabelFolder)
numLabels = len(labelFiles)
labelFileNames = [i.split(".")[0] for i in labelFiles]
labelFileNames = list(set(labelFileNames))
assert numLabels == len(labelFileNames), "Repeated label files!"
# find all image files
imageFiles = os.listdir(self.imageFolder)
imageFileNames = [i.split(".")[0] for i in imageFiles]
# verify there is a label for each image and vise-a-versa
imagesWithNoLabel = list(set(imageFileNames) - set(labelFileNames))
labelsWithNoImage = list(set(labelFileNames) - set(imageFileNames))
assert len(imagesWithNoLabel) == 0, "Images with no label file found: {}".format(imagesWithNoLabel)
assert len(labelsWithNoImage) == 0, "Labels with no image found: {}".format(labelsWithNoImage)
# call createXMLLabelFile() on each file
counter = 0
for label in labelFiles:
counter += 1
self.createXMLLabelFile(label)
if verbose and counter%100==0:
print("On image {}/{} {:.1f}% complete".format(counter,numLabels,float(counter)/float(numLabels)*100.))
print("Finished converting {} labels!".format(numLabels))