Skip to content

Commit 75c0d61

Browse files
committed
restored code
1 parent b905ad1 commit 75c0d61

File tree

1 file changed

+71
-0
lines changed

1 file changed

+71
-0
lines changed

randomAccessReader/__init__.py

Lines changed: 71 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,71 @@
1+
"""
2+
General random-access file reader with very small memory overhead
3+
Inspired by: http://stackoverflow.com/a/35785248/1857802
4+
5+
@author: Yaakov Gesher
6+
"""
7+
8+
# =============
9+
# imports
10+
# =============
11+
12+
13+
# ==========
14+
# classes
15+
# ==========
16+
17+
18+
class RandomAccessReader(object):
19+
20+
def __init__(self, filepath, endline_character='\n'):
21+
"""
22+
:param filepath: Absolute path to file
23+
:param endline_character: Delimiter for lines. Defaults to newline character (\n)
24+
"""
25+
self._filepath = filepath
26+
self._endline = endline_character
27+
self._lines = self._get_line_data()
28+
29+
def _get_line_data(self):
30+
f = open(self._filepath)
31+
lines = []
32+
start_position = 0
33+
has_more = True
34+
current_line = 0
35+
while has_more:
36+
current = f.read(1)
37+
if current == '':
38+
has_more = False
39+
continue
40+
41+
if current == self._endline:
42+
# we've reached the end of the current line
43+
lines.append({"position": start_position, "length": current_line})
44+
start_position += current_line + 1
45+
current_line = 0
46+
continue
47+
48+
current_line += 1
49+
f.close()
50+
return lines
51+
52+
def get_line(self, line_number):
53+
"""
54+
get the contents of a given line in the file
55+
:param line_number: 0-indexed line number
56+
:return: str
57+
"""
58+
with open(self._filepath) as f:
59+
line_data = self._lines[line_number]
60+
f.seek(line_data['position'])
61+
return f.read(line_data['length'])
62+
63+
64+
class CsvRandomAccessReader(RandomAccessReader):
65+
66+
def __init__(self, filepath, has_header=True, endline_character='\n', values_delimiter=','):
67+
super(CsvRandomAccessReader, self).__init__(filepath, endline_character)
68+
self.headers = None
69+
self._delimiter = values_delimiter
70+
if has_header:
71+

0 commit comments

Comments
 (0)