@@ -1290,3 +1290,64 @@ def test_to_parquet_decimal(session, bucket, database):
12901290 assert df2 [df2 .id == 2 ].iloc [0 ].decimal_5 is None
12911291 assert df2 [df2 .id == 3 ].iloc [0 ].decimal_2 == Decimal ((0 , (1 , 9 , 0 ), - 2 ))
12921292 assert df2 [df2 .id == 3 ].iloc [0 ].decimal_5 == Decimal ((0 , (1 , 9 , 0 , 0 , 0 , 0 ), - 5 ))
1293+
1294+
1295+ def test_read_parquet_dataset (session , bucket ):
1296+ df = pd .DataFrame ({
1297+ "id" : [1 , 2 , 3 ],
1298+ "decimal_2" : [Decimal ((0 , (1 , 9 , 9 ), - 2 )),
1299+ Decimal ((0 , (1 , 9 , 9 ), - 2 )),
1300+ Decimal ((0 , (1 , 9 , 0 ), - 2 ))],
1301+ "decimal_5" : [
1302+ Decimal ((0 , (1 , 9 , 9 , 9 , 9 , 9 ), - 5 )),
1303+ Decimal ((0 , (1 , 9 , 9 , 9 , 9 , 9 ), - 5 )),
1304+ Decimal ((0 , (1 , 9 , 0 , 0 , 0 , 0 ), - 5 ))
1305+ ],
1306+ "float" : [1.1 , 2.2 , 3.3 ],
1307+ "list_int" : [[1 , 2 ], [1 ], [3 , 4 , 5 ]],
1308+ "list_float" : [[1.0 , 2.0 , 3.0 ], [9.9 ], [4.0 , 5.0 ]],
1309+ "list_string" : [["foo" ], ["xxx" ], ["boo" , "bar" ]],
1310+ "list_timestamp" : [[datetime (2019 , 1 , 1 ), datetime (2019 , 1 , 2 )], [datetime (2019 , 1 , 3 )], [datetime (2019 , 1 ,
1311+ 3 )]],
1312+ "partition" : [0 , 0 , 1 ]
1313+ })
1314+ path = f"s3://{ bucket } /test_read_parquet/"
1315+ session .pandas .to_parquet (dataframe = df ,
1316+ path = path ,
1317+ mode = "overwrite" ,
1318+ preserve_index = False ,
1319+ procs_cpu_bound = 4 ,
1320+ partition_cols = ["partition" ])
1321+ df2 = session .pandas .read_parquet (path = path )
1322+ assert len (list (df .columns )) == len (list (df2 .columns ))
1323+ assert len (df .index ) == len (df2 .index )
1324+
1325+
1326+ def test_read_parquet_file (session , bucket ):
1327+ df = pd .DataFrame ({
1328+ "id" : [1 , 2 , 3 ],
1329+ "decimal_2" : [Decimal ((0 , (1 , 9 , 9 ), - 2 )),
1330+ Decimal ((0 , (1 , 9 , 9 ), - 2 )),
1331+ Decimal ((0 , (1 , 9 , 0 ), - 2 ))],
1332+ "decimal_5" : [
1333+ Decimal ((0 , (1 , 9 , 9 , 9 , 9 , 9 ), - 5 )),
1334+ Decimal ((0 , (1 , 9 , 9 , 9 , 9 , 9 ), - 5 )),
1335+ Decimal ((0 , (1 , 9 , 0 , 0 , 0 , 0 ), - 5 ))
1336+ ],
1337+ "float" : [1.1 , 2.2 , 3.3 ],
1338+ "list_int" : [[1 , 2 ], [1 ], [3 , 4 , 5 ]],
1339+ "list_float" : [[1.0 , 2.0 , 3.0 ], [9.9 ], [4.0 , 5.0 ]],
1340+ "list_string" : [["foo" ], ["xxx" ], ["boo" , "bar" ]],
1341+ "list_timestamp" : [[datetime (2019 , 1 , 1 ), datetime (2019 , 1 , 2 )], [datetime (2019 , 1 , 3 )], [datetime (2019 , 1 ,
1342+ 3 )]],
1343+ "partition" : [0 , 0 , 1 ]
1344+ })
1345+ path = f"s3://{ bucket } /test_read_parquet/"
1346+ filepath = session .pandas .to_parquet (dataframe = df ,
1347+ path = path ,
1348+ mode = "overwrite" ,
1349+ preserve_index = False ,
1350+ procs_cpu_bound = 1 )
1351+ df2 = session .pandas .read_parquet (path = filepath [0 ])
1352+ assert len (list (df .columns )) == len (list (df2 .columns ))
1353+ assert len (df .index ) == len (df2 .index )
0 commit comments