1
- from metacat .util import fetch_generator
2
- import json
1
+ from metacat .util import fetch_generator , chunked
2
+ import json , io , csv
3
3
4
4
def transactioned (method ):
5
5
def decorated (first , * params , transaction = None , ** args ):
6
6
7
7
if transaction is not None :
8
8
return method (first , * params , transaction = transaction , ** args )
9
-
9
+
10
10
if isinstance (first , HasDB ):
11
11
transaction = first .DB .transaction ()
12
12
elif isinstance (first , type ):
@@ -20,37 +20,31 @@ def decorated(first, *params, transaction=None, **args):
20
20
21
21
return decorated
22
22
23
- def insert_many (transaction , table , column_names , tuples , copy_threshold = 100 ):
24
-
25
- # if the tuples list or iterable is short enough, do it as multiple inserts
26
- tuples_lst , tuples = make_list_if_short (tuples , copy_threshold )
27
- if tuples_lst is not None and len (tuples_lst ) <= copy_threshold :
28
- columns = "," . join (column_names )
29
- placeholders = "," .join (["%s" ]* len (column_names ))
30
- try :
31
- transaction .executemany (f"""
32
- insert into parent_child({ columns } ) values({ placeholders } )
33
- """ , tuples_lst )
34
- if do_commit : cursor .execute ("commit" )
35
- except Exception as e :
36
- cursor .execute ("rollback" )
37
- raise
38
- else :
39
-
40
- csv_file = io .StringIO ()
41
- writer = csv .writer (csv_file , delimiter = '\t ' , quoting = csv .QUOTE_MINIMAL )
42
-
43
- for tup in tuples :
44
- assert len (tup ) == len (column_names )
45
- tup = ["\\ N" if x is None else x for x in tup ]
46
- writer .writerow (tup )
47
- csv_file .seek (0 ,0 )
48
- try :
49
- cursor .copy_from (csv_file , table , columns = column_names )
50
- if do_commit : cursor .execute ("commit" )
51
- except Exception as e :
52
- cursor .execute ("rollback" )
53
- raise
23
+ @transactioned
24
+ def insert_many (db , table , items , column_names = None , copy_threshold = 0 , chunk_size = 1000 , make_tuple = None , transaction = None ):
25
+ for chunk in chunked (items , chunk_size ):
26
+ if chunk :
27
+ if make_tuple is not None :
28
+ chunk = [make_tuple (item ) for item in chunk ]
29
+ if len (chunk ) <= copy_threshold :
30
+ cols = "" if not column_names else "(" + "," .join (column_names ) + ")"
31
+ ncols = len (column_names ) if column_names else len (chunk [0 ])
32
+ vals = "," .join (["%s" ] * ncols )
33
+ print ("cols:" , cols )
34
+ print ("vals:" , vals )
35
+ print ("chunk:" , chunk )
36
+ sql = f"insert into { table } { cols } values({ vals } )"
37
+ print ("sql:" , sql )
38
+ transaction .executemany (sql , chunk )
39
+ else :
40
+ csv_file = io .StringIO ()
41
+ writer = csv .writer (csv_file , delimiter = '\t ' , quoting = csv .QUOTE_MINIMAL )
42
+ for tup in chunk :
43
+ assert len (tup ) == len (column_names )
44
+ tup = ["\\ N" if x is None else x for x in tup ] # null in Postgres
45
+ writer .writerow (tup )
46
+ csv_file .seek (0 ,0 )
47
+ transaction .copy_from (csv_file , table , columns = column_names )
54
48
55
49
56
50
class HasDB (object ):
0 commit comments