1
+ """
2
+ General random-access file reader with very small memory overhead
3
+ Inspired by: http://stackoverflow.com/a/35785248/1857802
4
+
5
+ @author: Yaakov Gesher
6
+ """
7
+
8
+ # =============
9
+ # imports
10
+ # =============
11
+
12
+
13
+ # ==========
14
+ # classes
15
+ # ==========
16
+
17
+
18
+ class RandomAccessReader (object ):
19
+
20
+ def __init__ (self , filepath , endline_character = '\n ' ):
21
+ """
22
+ :param filepath: Absolute path to file
23
+ :param endline_character: Delimiter for lines. Defaults to newline character (\n )
24
+ """
25
+ self ._filepath = filepath
26
+ self ._endline = endline_character
27
+ self ._lines = self ._get_line_data ()
28
+
29
+ def _get_line_data (self ):
30
+ f = open (self ._filepath )
31
+ lines = []
32
+ start_position = 0
33
+ has_more = True
34
+ current_line = 0
35
+ while has_more :
36
+ current = f .read (1 )
37
+ if current == '' :
38
+ has_more = False
39
+ continue
40
+
41
+ if current == self ._endline :
42
+ # we've reached the end of the current line
43
+ lines .append ({"position" : start_position , "length" : current_line })
44
+ start_position += current_line + 1
45
+ current_line = 0
46
+ continue
47
+
48
+ current_line += 1
49
+ f .close ()
50
+ return lines
51
+
52
+ def get_line (self , line_number ):
53
+ """
54
+ get the contents of a given line in the file
55
+ :param line_number: 0-indexed line number
56
+ :return: str
57
+ """
58
+ with open (self ._filepath ) as f :
59
+ line_data = self ._lines [line_number ]
60
+ f .seek (line_data ['position' ])
61
+ return f .read (line_data ['length' ])
62
+
63
+
64
+ class CsvRandomAccessReader (RandomAccessReader ):
65
+
66
+ def __init__ (self , filepath , has_header = True , endline_character = '\n ' , values_delimiter = ',' ):
67
+ super (CsvRandomAccessReader , self ).__init__ (filepath , endline_character )
68
+ self .headers = None
69
+ self ._delimiter = values_delimiter
70
+ if has_header :
71
+
0 commit comments