apache
diff --git a/‎docs/source/user-guide/common-operations/select-and-filter.rst
+2-2 b/‎docs/source/user-guide/common-operations/select-and-filter.rst
+2-2
diff --git a/‎examples/import.py
+5-5 b/‎examples/import.py
+5-5
diff --git a/‎examples/tpch/convert_data_to_parquet.py
+1-1 b/‎examples/tpch/convert_data_to_parquet.py
+1-1
diff --git a/‎examples/tpch/q02_minimum_cost_supplier.py
+6-6 b/‎examples/tpch/q02_minimum_cost_supplier.py
+6-6
diff --git a/‎examples/tpch/q03_shipping_priority.py
+4-4 b/‎examples/tpch/q03_shipping_priority.py
+4-4
diff --git a/‎examples/tpch/q04_order_priority_checking.py
+3-3 b/‎examples/tpch/q04_order_priority_checking.py
+3-3
diff --git a/‎examples/tpch/q05_local_supplier_volume.py
+6-6 b/‎examples/tpch/q05_local_supplier_volume.py
+6-6
diff --git a/‎examples/tpch/q06_forecasting_revenue_change.py
+1-1 b/‎examples/tpch/q06_forecasting_revenue_change.py
+1-1
diff --git a/‎examples/tpch/q07_volume_shipping.py
+5-5 b/‎examples/tpch/q07_volume_shipping.py
+5-5
diff --git a/‎examples/tpch/q08_market_share.py
+8-10 b/‎examples/tpch/q08_market_share.py
+8-10
diff --git a/‎examples/tpch/q09_product_type_profit_measure.py
+6-8 b/‎examples/tpch/q09_product_type_profit_measure.py
+6-8
diff --git a/‎examples/tpch/q10_returned_item_reporting.py
+5-5 b/‎examples/tpch/q10_returned_item_reporting.py
+5-5
diff --git a/‎examples/tpch/q11_important_stock_identification.py
+4-4 b/‎examples/tpch/q11_important_stock_identification.py
+4-4
diff --git a/‎examples/tpch/q12_ship_mode_order_priority.py
+2-2 b/‎examples/tpch/q12_ship_mode_order_priority.py
+2-2
diff --git a/‎examples/tpch/q13_customer_distribution.py
+2-4 b/‎examples/tpch/q13_customer_distribution.py
+2-4
diff --git a/‎examples/tpch/q14_promotion_effect.py
+2-4 b/‎examples/tpch/q14_promotion_effect.py
+2-4
@@ -33,7 +33,7 @@ DataFusion can work with several file types, to start simple we can use a subset
     
     ctx = SessionContext()
     df = ctx.read_parquet("yellow_trip_data.parquet")
-    df.select_columns("trip_distance", "passenger_count")
+    df.select("trip_distance", "passenger_count")
 
 For mathematical or logical operations use :py:func:`~datafusion.col` to select columns, and give meaningful names to the resulting
 operations using :py:func:`~datafusion.expr.Expr.alias`
@@ -48,7 +48,7 @@ operations using :py:func:`~datafusion.expr.Expr.alias`
 
     Please be aware that all identifiers are effectively made lower-case in SQL, so if your file has capital letters
     (ex: Name) you must put your column name in double quotes or the selection won’t work. As an alternative for simple
-    column selection use :py:func:`~datafusion.dataframe.DataFrame.select_columns` without double quotes
+    column selection use :py:func:`~datafusion.dataframe.DataFrame.select` without double quotes
 
 For selecting columns with capital letters use ``'"VendorID"'``
 
 
@@ -28,7 +28,7 @@
 # The dictionary keys represent column names and the dictionary values
 # represent column values
 df = ctx.from_pydict({"a": [1, 2, 3], "b": [4, 5, 6]})
-assert type(df) == datafusion.DataFrame
+assert type(df) is datafusion.DataFrame
 # Dataframe:
 # +---+---+
 # | a | b |
@@ -40,19 +40,19 @@
 
 # Create a datafusion DataFrame from a Python list of rows
 df = ctx.from_pylist([{"a": 1, "b": 4}, {"a": 2, "b": 5}, {"a": 3, "b": 6}])
-assert type(df) == datafusion.DataFrame
+assert type(df) is datafusion.DataFrame
 
 # Convert pandas DataFrame to datafusion DataFrame
 pandas_df = pd.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})
 df = ctx.from_pandas(pandas_df)
-assert type(df) == datafusion.DataFrame
+assert type(df) is datafusion.DataFrame
 
 # Convert polars DataFrame to datafusion DataFrame
 polars_df = pl.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})
 df = ctx.from_polars(polars_df)
-assert type(df) == datafusion.DataFrame
+assert type(df) is datafusion.DataFrame
 
 # Convert Arrow Table to datafusion DataFrame
 arrow_table = pa.Table.from_pydict({"a": [1, 2, 3], "b": [4, 5, 6]})
 df = ctx.from_arrow(arrow_table)
-assert type(df) == datafusion.DataFrame
+assert type(df) is datafusion.DataFrame
@@ -138,6 +138,6 @@
 
     df = ctx.read_csv(source_file, schema=schema, has_header=False, delimiter="|")
 
-    df = df.select_columns(*output_cols)
+    df = df.select(*output_cols)
 
     df.write_parquet(dest_file, compression="snappy")
@@ -43,10 +43,10 @@
 
 ctx = SessionContext()
 
-df_part = ctx.read_parquet(get_data_path("part.parquet")).select_columns(
+df_part = ctx.read_parquet(get_data_path("part.parquet")).select(
     "p_partkey", "p_mfgr", "p_type", "p_size"
 )
-df_supplier = ctx.read_parquet(get_data_path("supplier.parquet")).select_columns(
+df_supplier = ctx.read_parquet(get_data_path("supplier.parquet")).select(
     "s_acctbal",
     "s_name",
     "s_address",
@@ -55,13 +55,13 @@
     "s_nationkey",
     "s_suppkey",
 )
-df_partsupp = ctx.read_parquet(get_data_path("partsupp.parquet")).select_columns(
+df_partsupp = ctx.read_parquet(get_data_path("partsupp.parquet")).select(
     "ps_partkey", "ps_suppkey", "ps_supplycost"
 )
-df_nation = ctx.read_parquet(get_data_path("nation.parquet")).select_columns(
+df_nation = ctx.read_parquet(get_data_path("nation.parquet")).select(
     "n_nationkey", "n_regionkey", "n_name"
 )
-df_region = ctx.read_parquet(get_data_path("region.parquet")).select_columns(
+df_region = ctx.read_parquet(get_data_path("region.parquet")).select(
     "r_regionkey", "r_name"
 )
 
@@ -115,7 +115,7 @@
 
 # From the problem statement, these are the values we wish to output
 
-df = df.select_columns(
+df = df.select(
     "s_acctbal",
     "s_name",
     "n_name",
 
@@ -37,13 +37,13 @@
 
 ctx = SessionContext()
 
-df_customer = ctx.read_parquet(get_data_path("customer.parquet")).select_columns(
+df_customer = ctx.read_parquet(get_data_path("customer.parquet")).select(
     "c_mktsegment", "c_custkey"
 )
-df_orders = ctx.read_parquet(get_data_path("orders.parquet")).select_columns(
+df_orders = ctx.read_parquet(get_data_path("orders.parquet")).select(
     "o_orderdate", "o_shippriority", "o_custkey", "o_orderkey"
 )
-df_lineitem = ctx.read_parquet(get_data_path("lineitem.parquet")).select_columns(
+df_lineitem = ctx.read_parquet(get_data_path("lineitem.parquet")).select(
     "l_orderkey", "l_extendedprice", "l_discount", "l_shipdate"
 )
 
@@ -80,7 +80,7 @@
 
 # Change the order that the columns are reported in just to match the spec
 
-df = df.select_columns("l_orderkey", "revenue", "o_orderdate", "o_shippriority")
+df = df.select("l_orderkey", "revenue", "o_orderdate", "o_shippriority")
 
 # Show result
 
 
@@ -39,10 +39,10 @@
 
 ctx = SessionContext()
 
-df_orders = ctx.read_parquet(get_data_path("orders.parquet")).select_columns(
+df_orders = ctx.read_parquet(get_data_path("orders.parquet")).select(
     "o_orderdate", "o_orderpriority", "o_orderkey"
 )
-df_lineitem = ctx.read_parquet(get_data_path("lineitem.parquet")).select_columns(
+df_lineitem = ctx.read_parquet(get_data_path("lineitem.parquet")).select(
     "l_orderkey", "l_commitdate", "l_receiptdate"
 )
 
@@ -54,7 +54,7 @@
 # Limit results to cases where commitment date before receipt date
 # Aggregate the results so we only get one row to join with the order table.
 # Alternately, and likely more idiomatic is instead of `.aggregate` you could
-# do `.select_columns("l_orderkey").distinct()`. The goal here is to show
+# do `.select("l_orderkey").distinct()`. The goal here is to show
 # multiple examples of how to use Data Fusion.
 df_lineitem = df_lineitem.filter(col("l_commitdate") < col("l_receiptdate")).aggregate(
     [col("l_orderkey")], []
 
@@ -47,22 +47,22 @@
 
 ctx = SessionContext()
 
-df_customer = ctx.read_parquet(get_data_path("customer.parquet")).select_columns(
+df_customer = ctx.read_parquet(get_data_path("customer.parquet")).select(
     "c_custkey", "c_nationkey"
 )
-df_orders = ctx.read_parquet(get_data_path("orders.parquet")).select_columns(
+df_orders = ctx.read_parquet(get_data_path("orders.parquet")).select(
     "o_custkey", "o_orderkey", "o_orderdate"
 )
-df_lineitem = ctx.read_parquet(get_data_path("lineitem.parquet")).select_columns(
+df_lineitem = ctx.read_parquet(get_data_path("lineitem.parquet")).select(
     "l_orderkey", "l_suppkey", "l_extendedprice", "l_discount"
 )
-df_supplier = ctx.read_parquet(get_data_path("supplier.parquet")).select_columns(
+df_supplier = ctx.read_parquet(get_data_path("supplier.parquet")).select(
     "s_suppkey", "s_nationkey"
 )
-df_nation = ctx.read_parquet(get_data_path("nation.parquet")).select_columns(
+df_nation = ctx.read_parquet(get_data_path("nation.parquet")).select(
     "n_nationkey", "n_regionkey", "n_name"
 )
-df_region = ctx.read_parquet(get_data_path("region.parquet")).select_columns(
+df_region = ctx.read_parquet(get_data_path("region.parquet")).select(
     "r_regionkey", "r_name"
 )
 
 
@@ -51,7 +51,7 @@
 
 ctx = SessionContext()
 
-df_lineitem = ctx.read_parquet(get_data_path("lineitem.parquet")).select_columns(
+df_lineitem = ctx.read_parquet(get_data_path("lineitem.parquet")).select(
     "l_shipdate", "l_quantity", "l_extendedprice", "l_discount"
 )
 
 
@@ -49,19 +49,19 @@
 
 ctx = SessionContext()
 
-df_supplier = ctx.read_parquet(get_data_path("supplier.parquet")).select_columns(
+df_supplier = ctx.read_parquet(get_data_path("supplier.parquet")).select(
     "s_suppkey", "s_nationkey"
 )
-df_lineitem = ctx.read_parquet(get_data_path("lineitem.parquet")).select_columns(
+df_lineitem = ctx.read_parquet(get_data_path("lineitem.parquet")).select(
     "l_shipdate", "l_extendedprice", "l_discount", "l_suppkey", "l_orderkey"
 )
-df_orders = ctx.read_parquet(get_data_path("orders.parquet")).select_columns(
+df_orders = ctx.read_parquet(get_data_path("orders.parquet")).select(
     "o_orderkey", "o_custkey"
 )
-df_customer = ctx.read_parquet(get_data_path("customer.parquet")).select_columns(
+df_customer = ctx.read_parquet(get_data_path("customer.parquet")).select(
     "c_custkey", "c_nationkey"
 )
-df_nation = ctx.read_parquet(get_data_path("nation.parquet")).select_columns(
+df_nation = ctx.read_parquet(get_data_path("nation.parquet")).select(
     "n_nationkey", "n_name"
 )
 
 
@@ -47,25 +47,23 @@
 
 ctx = SessionContext()
 
-df_part = ctx.read_parquet(get_data_path("part.parquet")).select_columns(
-    "p_partkey", "p_type"
-)
-df_supplier = ctx.read_parquet(get_data_path("supplier.parquet")).select_columns(
+df_part = ctx.read_parquet(get_data_path("part.parquet")).select("p_partkey", "p_type")
+df_supplier = ctx.read_parquet(get_data_path("supplier.parquet")).select(
     "s_suppkey", "s_nationkey"
 )
-df_lineitem = ctx.read_parquet(get_data_path("lineitem.parquet")).select_columns(
+df_lineitem = ctx.read_parquet(get_data_path("lineitem.parquet")).select(
     "l_partkey", "l_extendedprice", "l_discount", "l_suppkey", "l_orderkey"
 )
-df_orders = ctx.read_parquet(get_data_path("orders.parquet")).select_columns(
+df_orders = ctx.read_parquet(get_data_path("orders.parquet")).select(
     "o_orderkey", "o_custkey", "o_orderdate"
 )
-df_customer = ctx.read_parquet(get_data_path("customer.parquet")).select_columns(
+df_customer = ctx.read_parquet(get_data_path("customer.parquet")).select(
     "c_custkey", "c_nationkey"
 )
-df_nation = ctx.read_parquet(get_data_path("nation.parquet")).select_columns(
+df_nation = ctx.read_parquet(get_data_path("nation.parquet")).select(
     "n_nationkey", "n_name", "n_regionkey"
 )
-df_region = ctx.read_parquet(get_data_path("region.parquet")).select_columns(
+df_region = ctx.read_parquet(get_data_path("region.parquet")).select(
     "r_regionkey", "r_name"
 )
 
@@ -133,7 +131,7 @@
 
 # When we join to the customer dataframe, we don't want to confuse other columns, so only
 # select the supplier key that we need
-df_national_suppliers = df_national_suppliers.select_columns("s_suppkey")
+df_national_suppliers = df_national_suppliers.select("s_suppkey")
 
 
 # Part 3: Combine suppliers and customers and compute the market share
 
@@ -39,27 +39,25 @@
 
 ctx = SessionContext()
 
-df_part = ctx.read_parquet(get_data_path("part.parquet")).select_columns(
-    "p_partkey", "p_name"
-)
-df_supplier = ctx.read_parquet(get_data_path("supplier.parquet")).select_columns(
+df_part = ctx.read_parquet(get_data_path("part.parquet")).select("p_partkey", "p_name")
+df_supplier = ctx.read_parquet(get_data_path("supplier.parquet")).select(
     "s_suppkey", "s_nationkey"
 )
-df_partsupp = ctx.read_parquet(get_data_path("partsupp.parquet")).select_columns(
+df_partsupp = ctx.read_parquet(get_data_path("partsupp.parquet")).select(
     "ps_suppkey", "ps_partkey", "ps_supplycost"
 )
-df_lineitem = ctx.read_parquet(get_data_path("lineitem.parquet")).select_columns(
+df_lineitem = ctx.read_parquet(get_data_path("lineitem.parquet")).select(
     "l_partkey",
     "l_extendedprice",
     "l_discount",
     "l_suppkey",
     "l_orderkey",
     "l_quantity",
 )
-df_orders = ctx.read_parquet(get_data_path("orders.parquet")).select_columns(
+df_orders = ctx.read_parquet(get_data_path("orders.parquet")).select(
     "o_orderkey", "o_custkey", "o_orderdate"
 )
-df_nation = ctx.read_parquet(get_data_path("nation.parquet")).select_columns(
+df_nation = ctx.read_parquet(get_data_path("nation.parquet")).select(
     "n_nationkey", "n_name", "n_regionkey"
 )
 
 
@@ -44,7 +44,7 @@
 
 ctx = SessionContext()
 
-df_customer = ctx.read_parquet(get_data_path("customer.parquet")).select_columns(
+df_customer = ctx.read_parquet(get_data_path("customer.parquet")).select(
     "c_custkey",
     "c_nationkey",
     "c_name",
@@ -53,13 +53,13 @@
     "c_phone",
     "c_comment",
 )
-df_lineitem = ctx.read_parquet(get_data_path("lineitem.parquet")).select_columns(
+df_lineitem = ctx.read_parquet(get_data_path("lineitem.parquet")).select(
     "l_extendedprice", "l_discount", "l_orderkey", "l_returnflag"
 )
-df_orders = ctx.read_parquet(get_data_path("orders.parquet")).select_columns(
+df_orders = ctx.read_parquet(get_data_path("orders.parquet")).select(
     "o_orderkey", "o_custkey", "o_orderdate"
 )
-df_nation = ctx.read_parquet(get_data_path("nation.parquet")).select_columns(
+df_nation = ctx.read_parquet(get_data_path("nation.parquet")).select(
     "n_nationkey", "n_name", "n_regionkey"
 )
 
@@ -87,7 +87,7 @@
 df = df.join(df_nation, (["c_nationkey"], ["n_nationkey"]), how="inner")
 
 # These are the columns the problem statement requires
-df = df.select_columns(
+df = df.select(
     "c_custkey",
     "c_name",
     "revenue",
 
@@ -37,13 +37,13 @@
 
 ctx = SessionContext()
 
-df_supplier = ctx.read_parquet(get_data_path("supplier.parquet")).select_columns(
+df_supplier = ctx.read_parquet(get_data_path("supplier.parquet")).select(
     "s_suppkey", "s_nationkey"
 )
-df_partsupp = ctx.read_parquet(get_data_path("partsupp.parquet")).select_columns(
+df_partsupp = ctx.read_parquet(get_data_path("partsupp.parquet")).select(
     "ps_supplycost", "ps_availqty", "ps_suppkey", "ps_partkey"
 )
-df_nation = ctx.read_parquet(get_data_path("nation.parquet")).select_columns(
+df_nation = ctx.read_parquet(get_data_path("nation.parquet")).select(
     "n_nationkey", "n_name"
 )
 
@@ -75,7 +75,7 @@
 df = df.filter(col("value") / col("total_value") >= lit(FRACTION))
 
 # We only need to report on these two columns
-df = df.select_columns("ps_partkey", "value")
+df = df.select("ps_partkey", "value")
 
 # Sort in descending order of value
 df = df.sort(col("value").sort(ascending=False))
 
@@ -42,10 +42,10 @@
 
 ctx = SessionContext()
 
-df_orders = ctx.read_parquet(get_data_path("orders.parquet")).select_columns(
+df_orders = ctx.read_parquet(get_data_path("orders.parquet")).select(
     "o_orderkey", "o_orderpriority"
 )
-df_lineitem = ctx.read_parquet(get_data_path("lineitem.parquet")).select_columns(
+df_lineitem = ctx.read_parquet(get_data_path("lineitem.parquet")).select(
     "l_orderkey", "l_shipmode", "l_commitdate", "l_shipdate", "l_receiptdate"
 )
 
 
@@ -38,12 +38,10 @@
 
 ctx = SessionContext()
 
-df_orders = ctx.read_parquet(get_data_path("orders.parquet")).select_columns(
+df_orders = ctx.read_parquet(get_data_path("orders.parquet")).select(
     "o_custkey", "o_comment"
 )
-df_customer = ctx.read_parquet(get_data_path("customer.parquet")).select_columns(
-    "c_custkey"
-)
+df_customer = ctx.read_parquet(get_data_path("customer.parquet")).select("c_custkey")
 
 # Use a regex to remove special cases
 df_orders = df_orders.filter(
 
@@ -41,12 +41,10 @@
 
 ctx = SessionContext()
 
-df_lineitem = ctx.read_parquet(get_data_path("lineitem.parquet")).select_columns(
+df_lineitem = ctx.read_parquet(get_data_path("lineitem.parquet")).select(
     "l_partkey", "l_shipdate", "l_extendedprice", "l_discount"
 )
-df_part = ctx.read_parquet(get_data_path("part.parquet")).select_columns(
-    "p_partkey", "p_type"
-)
+df_part = ctx.read_parquet(get_data_path("part.parquet")).select("p_partkey", "p_type")
 
 
 # Check part type begins with PROMO
Original file line number	Diff line number	Diff line change
`@@ -37,13 +37,13 @@`
`37`	`37`
`38`	`38`	`ctx = SessionContext()`
`39`	`39`
`40`		`-df_customer = ctx.read_parquet(get_data_path("customer.parquet")).select_columns(`
	`40`	`+df_customer = ctx.read_parquet(get_data_path("customer.parquet")).select(`
`41`	`41`	`"c_mktsegment", "c_custkey"`
`42`	`42`	`)`
`43`		`-df_orders = ctx.read_parquet(get_data_path("orders.parquet")).select_columns(`
	`43`	`+df_orders = ctx.read_parquet(get_data_path("orders.parquet")).select(`
`44`	`44`	`"o_orderdate", "o_shippriority", "o_custkey", "o_orderkey"`
`45`	`45`	`)`
`46`		`-df_lineitem = ctx.read_parquet(get_data_path("lineitem.parquet")).select_columns(`
	`46`	`+df_lineitem = ctx.read_parquet(get_data_path("lineitem.parquet")).select(`
`47`	`47`	`"l_orderkey", "l_extendedprice", "l_discount", "l_shipdate"`
`48`	`48`	`)`
`49`	`49`
`@@ -80,7 +80,7 @@`
`80`	`80`
`81`	`81`	`# Change the order that the columns are reported in just to match the spec`
`82`	`82`
`83`		`-df = df.select_columns("l_orderkey", "revenue", "o_orderdate", "o_shippriority")`
	`83`	`+df = df.select("l_orderkey", "revenue", "o_orderdate", "o_shippriority")`
`84`	`84`
`85`	`85`	`# Show result`
`86`	`86`
Original file line number	Diff line number	Diff line change
`@@ -47,22 +47,22 @@`
`47`	`47`
`48`	`48`	`ctx = SessionContext()`
`49`	`49`
`50`		`-df_customer = ctx.read_parquet(get_data_path("customer.parquet")).select_columns(`
	`50`	`+df_customer = ctx.read_parquet(get_data_path("customer.parquet")).select(`
`51`	`51`	`"c_custkey", "c_nationkey"`
`52`	`52`	`)`
`53`		`-df_orders = ctx.read_parquet(get_data_path("orders.parquet")).select_columns(`
	`53`	`+df_orders = ctx.read_parquet(get_data_path("orders.parquet")).select(`
`54`	`54`	`"o_custkey", "o_orderkey", "o_orderdate"`
`55`	`55`	`)`
`56`		`-df_lineitem = ctx.read_parquet(get_data_path("lineitem.parquet")).select_columns(`
	`56`	`+df_lineitem = ctx.read_parquet(get_data_path("lineitem.parquet")).select(`
`57`	`57`	`"l_orderkey", "l_suppkey", "l_extendedprice", "l_discount"`
`58`	`58`	`)`
`59`		`-df_supplier = ctx.read_parquet(get_data_path("supplier.parquet")).select_columns(`
	`59`	`+df_supplier = ctx.read_parquet(get_data_path("supplier.parquet")).select(`
`60`	`60`	`"s_suppkey", "s_nationkey"`
`61`	`61`	`)`
`62`		`-df_nation = ctx.read_parquet(get_data_path("nation.parquet")).select_columns(`
	`62`	`+df_nation = ctx.read_parquet(get_data_path("nation.parquet")).select(`
`63`	`63`	`"n_nationkey", "n_regionkey", "n_name"`
`64`	`64`	`)`
`65`		`-df_region = ctx.read_parquet(get_data_path("region.parquet")).select_columns(`
	`65`	`+df_region = ctx.read_parquet(get_data_path("region.parquet")).select(`
`66`	`66`	`"r_regionkey", "r_name"`
`67`	`67`	`)`
`68`	`68`
Original file line number	Diff line number	Diff line change
`@@ -51,7 +51,7 @@`
`51`	`51`
`52`	`52`	`ctx = SessionContext()`
`53`	`53`
`54`		`-df_lineitem = ctx.read_parquet(get_data_path("lineitem.parquet")).select_columns(`
	`54`	`+df_lineitem = ctx.read_parquet(get_data_path("lineitem.parquet")).select(`
`55`	`55`	`"l_shipdate", "l_quantity", "l_extendedprice", "l_discount"`
`56`	`56`	`)`
`57`	`57`
Original file line number	Diff line number	Diff line change
`@@ -49,19 +49,19 @@`
`49`	`49`
`50`	`50`	`ctx = SessionContext()`
`51`	`51`
`52`		`-df_supplier = ctx.read_parquet(get_data_path("supplier.parquet")).select_columns(`
	`52`	`+df_supplier = ctx.read_parquet(get_data_path("supplier.parquet")).select(`
`53`	`53`	`"s_suppkey", "s_nationkey"`
`54`	`54`	`)`
`55`		`-df_lineitem = ctx.read_parquet(get_data_path("lineitem.parquet")).select_columns(`
	`55`	`+df_lineitem = ctx.read_parquet(get_data_path("lineitem.parquet")).select(`
`56`	`56`	`"l_shipdate", "l_extendedprice", "l_discount", "l_suppkey", "l_orderkey"`
`57`	`57`	`)`
`58`		`-df_orders = ctx.read_parquet(get_data_path("orders.parquet")).select_columns(`
	`58`	`+df_orders = ctx.read_parquet(get_data_path("orders.parquet")).select(`
`59`	`59`	`"o_orderkey", "o_custkey"`
`60`	`60`	`)`
`61`		`-df_customer = ctx.read_parquet(get_data_path("customer.parquet")).select_columns(`
	`61`	`+df_customer = ctx.read_parquet(get_data_path("customer.parquet")).select(`
`62`	`62`	`"c_custkey", "c_nationkey"`
`63`	`63`	`)`
`64`		`-df_nation = ctx.read_parquet(get_data_path("nation.parquet")).select_columns(`
	`64`	`+df_nation = ctx.read_parquet(get_data_path("nation.parquet")).select(`
`65`	`65`	`"n_nationkey", "n_name"`
`66`	`66`	`)`
`67`	`67`
Original file line number	Diff line number	Diff line change
`@@ -42,10 +42,10 @@`
`42`	`42`
`43`	`43`	`ctx = SessionContext()`
`44`	`44`
`45`		`-df_orders = ctx.read_parquet(get_data_path("orders.parquet")).select_columns(`
	`45`	`+df_orders = ctx.read_parquet(get_data_path("orders.parquet")).select(`
`46`	`46`	`"o_orderkey", "o_orderpriority"`
`47`	`47`	`)`
`48`		`-df_lineitem = ctx.read_parquet(get_data_path("lineitem.parquet")).select_columns(`
	`48`	`+df_lineitem = ctx.read_parquet(get_data_path("lineitem.parquet")).select(`
`49`	`49`	`"l_orderkey", "l_shipmode", "l_commitdate", "l_shipdate", "l_receiptdate"`
`50`	`50`	`)`
`51`	`51`