-
Notifications
You must be signed in to change notification settings - Fork 77
Expand file tree
/
Copy pathunevensplit.py
More file actions
79 lines (68 loc) · 2.51 KB
/
unevensplit.py
File metadata and controls
79 lines (68 loc) · 2.51 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
'''
Created on Dec 22, 2016
@author: vaibhavsaini
'''
import sys
import math
class Spliter(object):
'''
classdocs
'''
def __init__(self, params):
'''
Constructor
'''
self.split_count= params['split_count']
self.input_filename = params['input_filename']
self.total_lines = self.get_num_lines_in_input_file()
self.find_base_x()
def split(self):
"""
splits the input file into split_count number of files.
"""
count=0
line_limit = self.base_x
print ("line_limit is ", line_limit)
file_count=1
try:
print("creating split ",file_count)
self.outfile = open("query_{part}.file".format(part=file_count),'w')
with open(self.input_filename,'r') as inputfile:
for row in inputfile:
if count<line_limit:
self.outfile.write(row)
else:
self.outfile.flush()
self.outfile.close()
file_count+=1
count =0
line_limit =line_limit + math.ceil(0.5*self.base_x)
print ("line_limit is ", line_limit)
print ("creating split ",file_count)
self.outfile = open("query_{part}.file".format(part=file_count),'w')
self.outfile.write(row)
count+=1
self.outfile.flush()
self.outfile.close()
except IOError as e:
print ("Error: {error}".format(error=e))
sys.exit(1)
def get_num_lines_in_input_file(self):
with open(self.input_filename) as f:
for i, l in enumerate(f):
pass
print ("total lines in the inputfile: {0} ".format(i+1))
return i + 1
def find_base_x(self):
# formula for S = x + x+.5x + x+2*.5x...x + (N-1)*.5x
self.base_x= math.ceil(float(2*self.total_lines)/(float((self.split_count+1)*(self.split_count+2)/2) - 1))
print ("base_x is ", self.base_x)
if __name__ == '__main__':
input_file = sys.argv[1]
split_count = int(sys.argv[2])
params= {'split_count':split_count,
'input_filename' : input_file}
print ("spliting {inputfile} in {count} chunks".format(inputfile=input_file,count=split_count))
splitter = Spliter(params)
splitter.split()
print ("splitting done!")